# 👗 Fashion-MNIST with ResNet

Welcome to **Residual Networks**! In this notebook, we'll build deep CNNs that can actually train effectively using residual connections. We'll classify fashion items with state-of-the-art accuracy.

## What you'll learn:
- ResNet architecture and residual blocks
- Skip connections and identity mappings
- Transfer learning with pre-trained models
- Advanced training techniques

Let's build some deep networks! 🏗️

In [None]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

plt.style.use('seaborn-v0_8')
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")

In [None]:
# Load Fashion-MNIST dataset
(X_train, y_train), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()

# Class names
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")
print(f"Number of classes: {len(class_names)}")

# Visualize samples
fig, axes = plt.subplots(2, 5, figsize=(15, 8))
for i, ax in enumerate(axes.flat):
    ax.imshow(X_train[i], cmap='gray')
    ax.set_title(f'{class_names[y_train[i]]}')
    ax.axis('off')

plt.suptitle('👗 Fashion-MNIST Dataset Samples', fontsize=16)
plt.tight_layout()
plt.show()

In [None]:
# Preprocess data
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Add channel dimension and convert to RGB for transfer learning
X_train = np.expand_dims(X_train, -1)
X_test = np.expand_dims(X_test, -1)
X_train_rgb = np.repeat(X_train, 3, axis=-1)
X_test_rgb = np.repeat(X_test, 3, axis=-1)

# Resize for ResNet (requires 32x32 minimum)
X_train_resized = tf.image.resize(X_train_rgb, [32, 32])
X_test_resized = tf.image.resize(X_test_rgb, [32, 32])

# Convert labels to categorical
y_train_cat = keras.utils.to_categorical(y_train, 10)
y_test_cat = keras.utils.to_categorical(y_test, 10)

print("✅ Data preprocessing completed!")
print(f"Resized training data shape: {X_train_resized.shape}")

In [None]:
# Define residual block
def residual_block(x, filters, kernel_size=3, stride=1, conv_shortcut=False):
    """A residual block with skip connection"""
    shortcut = x
    
    # Main path
    x = layers.Conv2D(filters, kernel_size, strides=stride, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    
    x = layers.Conv2D(filters, kernel_size, padding='same')(x)
    x = layers.BatchNormalization()(x)
    
    # Shortcut path
    if conv_shortcut:
        shortcut = layers.Conv2D(filters, 1, strides=stride, padding='same')(shortcut)
        shortcut = layers.BatchNormalization()(shortcut)
    
    # Add shortcut to main path
    x = layers.Add()([x, shortcut])
    x = layers.ReLU()(x)
    
    return x

def create_mini_resnet():
    """Create a mini ResNet for Fashion-MNIST"""
    inputs = layers.Input(shape=(28, 28, 1))
    
    # Initial conv layer
    x = layers.Conv2D(64, 7, strides=2, padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D(3, strides=2, padding='same')(x)
    
    # Residual blocks
    x = residual_block(x, 64)
    x = residual_block(x, 64)
    
    x = residual_block(x, 128, stride=2, conv_shortcut=True)
    x = residual_block(x, 128)
    
    x = residual_block(x, 256, stride=2, conv_shortcut=True)
    x = residual_block(x, 256)
    
    # Global average pooling and classification
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(10, activation='softmax')(x)
    
    model = models.Model(inputs, outputs)
    return model

# Create and compile mini ResNet
mini_resnet = create_mini_resnet()
mini_resnet.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("🏗️ Mini ResNet Architecture:")
mini_resnet.summary()

In [None]:
# Train mini ResNet
callbacks = [
    EarlyStopping(patience=10, restore_best_weights=True),
    ReduceLROnPlateau(factor=0.5, patience=5, min_lr=1e-7)
]

print("🚀 Training Mini ResNet...")
history_mini = mini_resnet.fit(
    X_train, y_train_cat,
    batch_size=128,
    epochs=30,
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1
)

# Evaluate
test_loss, test_acc = mini_resnet.evaluate(X_test, y_test_cat, verbose=0)
print(f"\n🎯 Mini ResNet Test Accuracy: {test_acc:.4f}")

In [None]:
# Transfer Learning with ResNet50
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(32, 32, 3)
)

# Freeze base model
base_model.trainable = False

# Add custom head
inputs = keras.Input(shape=(32, 32, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(10, activation='softmax')(x)

transfer_model = keras.Model(inputs, outputs)
transfer_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("🔄 Transfer Learning Model:")
print(f"Total parameters: {transfer_model.count_params():,}")
print(f"Trainable parameters: {sum([tf.size(w).numpy() for w in transfer_model.trainable_weights]):,}")

In [None]:
# Train transfer learning model
print("🚀 Training Transfer Learning Model...")
history_transfer = transfer_model.fit(
    X_train_resized, y_train_cat,
    batch_size=128,
    epochs=15,
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1
)

# Fine-tuning: Unfreeze top layers
base_model.trainable = True
for layer in base_model.layers[:-10]:
    layer.trainable = False

transfer_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("🔧 Fine-tuning...")
history_finetune = transfer_model.fit(
    X_train_resized, y_train_cat,
    batch_size=128,
    epochs=10,
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1
)

# Evaluate transfer model
transfer_test_loss, transfer_test_acc = transfer_model.evaluate(X_test_resized, y_test_cat, verbose=0)
print(f"\n🎯 Transfer Learning Test Accuracy: {transfer_test_acc:.4f}")

In [None]:
# Compare results
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Mini ResNet training curves
ax1.plot(history_mini.history['accuracy'], label='Training')
ax1.plot(history_mini.history['val_accuracy'], label='Validation')
ax1.set_title('📈 Mini ResNet - Accuracy')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.legend()
ax1.grid(True, alpha=0.3)

ax2.plot(history_mini.history['loss'], label='Training')
ax2.plot(history_mini.history['val_loss'], label='Validation')
ax2.set_title('📉 Mini ResNet - Loss')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Transfer learning curves
ax3.plot(history_transfer.history['accuracy'], label='Training')
ax3.plot(history_transfer.history['val_accuracy'], label='Validation')
ax3.set_title('📈 Transfer Learning - Accuracy')
ax3.set_xlabel('Epoch')
ax3.set_ylabel('Accuracy')
ax3.legend()
ax3.grid(True, alpha=0.3)

ax4.plot(history_transfer.history['loss'], label='Training')
ax4.plot(history_transfer.history['val_loss'], label='Validation')
ax4.set_title('📉 Transfer Learning - Loss')
ax4.set_xlabel('Epoch')
ax4.set_ylabel('Loss')
ax4.legend()
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\n🏆 Final Results Comparison:")
print(f"Mini ResNet Test Accuracy: {test_acc:.4f}")
print(f"Transfer Learning Test Accuracy: {transfer_test_acc:.4f}")
print(f"Improvement: {(transfer_test_acc - test_acc)*100:.2f}%")

## 🎉 Congratulations!

You've mastered ResNet and transfer learning! Here's what you've accomplished:

✅ **ResNet Architecture**: Built deep networks with skip connections  
✅ **Transfer Learning**: Leveraged pre-trained models  
✅ **Fine-tuning**: Optimized for specific tasks  
✅ **Advanced Training**: Used modern techniques  

### 🚀 Next Steps:
1. Try other architectures (DenseNet, EfficientNet)
2. Experiment with attention mechanisms
3. Implement custom residual blocks
4. Move on to **Project 04: Text Classification using RNN**

Ready for NLP? Let's dive into text! 📝