# Transfer Learning with Pre-trained Models

In this notebook, we'll demonstrate transfer learning using pre-trained models for image classification.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Set random seeds
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {len(tf.config.list_physical_devices('GPU')) > 0}")

## Load and Prepare Data

In [None]:
# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Class names
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
               'dog', 'frog', 'horse', 'ship', 'truck']

# Use a subset for faster training
train_size = 5000
test_size = 1000

x_train_subset = x_train[:train_size]
y_train_subset = y_train[:train_size]
x_test_subset = x_test[:test_size]
y_test_subset = y_test[:test_size]

print(f"Training samples: {x_train_subset.shape[0]}")
print(f"Test samples: {x_test_subset.shape[0]}")
print(f"Image shape: {x_train_subset.shape[1:]}")

In [None]:
# Preprocess data for pre-trained models
def preprocess_for_pretrained(x_data, target_size=(224, 224)):
    """Preprocess images for pre-trained models"""
    # Resize images to target size
    x_resized = tf.image.resize(x_data, target_size)
    
    # Convert to float32 and normalize
    x_resized = tf.cast(x_resized, tf.float32)
    
    return x_resized

# Resize images to 224x224 (standard input size for many pre-trained models)
x_train_resized = preprocess_for_pretrained(x_train_subset)
x_test_resized = preprocess_for_pretrained(x_test_subset)

# Convert labels to categorical
y_train_cat = keras.utils.to_categorical(y_train_subset, 10)
y_test_cat = keras.utils.to_categorical(y_test_subset, 10)

print(f"Resized training shape: {x_train_resized.shape}")
print(f"Resized test shape: {x_test_resized.shape}")

In [None]:
# Visualize resized images
plt.figure(figsize=(15, 5))
for i in range(5):
    # Original image
    plt.subplot(2, 5, i + 1)
    plt.imshow(x_train_subset[i])
    plt.title(f'Original 32x32\n{class_names[y_train_subset[i][0]]}')
    plt.axis('off')
    
    # Resized image
    plt.subplot(2, 5, i + 6)
    plt.imshow(x_train_resized[i].numpy().astype('uint8'))
    plt.title(f'Resized 224x224\n{class_names[y_train_subset[i][0]]}')
    plt.axis('off')

plt.tight_layout()
plt.show()

## Transfer Learning with VGG16

In [None]:
def create_transfer_model(base_model_name='VGG16', num_classes=10, trainable=False):
    """Create transfer learning model"""
    
    # Load pre-trained base model
    if base_model_name == 'VGG16':
        base_model = keras.applications.VGG16(
            weights='imagenet',
            include_top=False,
            input_shape=(224, 224, 3)
        )
        preprocess_func = keras.applications.vgg16.preprocess_input
    elif base_model_name == 'ResNet50':
        base_model = keras.applications.ResNet50(
            weights='imagenet',
            include_top=False,
            input_shape=(224, 224, 3)
        )
        preprocess_func = keras.applications.resnet50.preprocess_input
    elif base_model_name == 'MobileNetV2':
        base_model = keras.applications.MobileNetV2(
            weights='imagenet',
            include_top=False,
            input_shape=(224, 224, 3)
        )
        preprocess_func = keras.applications.mobilenet_v2.preprocess_input
    
    # Freeze base model layers
    base_model.trainable = trainable
    
    # Add custom classifier
    model = keras.Sequential([
        keras.layers.Lambda(preprocess_func),  # Preprocessing
        base_model,
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(num_classes, activation='softmax')
    ])
    
    return model, base_model

# Create VGG16 transfer learning model
vgg_model, vgg_base = create_transfer_model('VGG16', trainable=False)

print("VGG16 Transfer Learning Model:")
vgg_model.summary()

print(f"\nTotal parameters: {vgg_model.count_params():,}")
print(f"Trainable parameters: {sum([tf.size(w).numpy() for w in vgg_model.trainable_weights]):,}")
print(f"Non-trainable parameters: {sum([tf.size(w).numpy() for w in vgg_model.non_trainable_weights]):,}")

In [None]:
# Compile and train VGG16 model
vgg_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks
callbacks = [
    keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=3)
]

print("Training VGG16 transfer learning model...")
vgg_history = vgg_model.fit(
    x_train_resized, y_train_cat,
    batch_size=32,
    epochs=20,
    validation_data=(x_test_resized, y_test_cat),
    callbacks=callbacks,
    verbose=1
)

print("VGG16 training completed!")

## Compare Different Pre-trained Models

In [None]:
# Train multiple pre-trained models
models_to_compare = ['VGG16', 'ResNet50', 'MobileNetV2']
model_results = {}

for model_name in models_to_compare:
    print(f"\nTraining {model_name}...")
    
    # Create model
    model, base_model = create_transfer_model(model_name, trainable=False)
    
    # Compile
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.0001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # Train (fewer epochs for comparison)
    history = model.fit(
        x_train_resized, y_train_cat,
        batch_size=32,
        epochs=10,
        validation_data=(x_test_resized, y_test_cat),
        verbose=0
    )
    
    # Evaluate
    test_loss, test_acc = model.evaluate(x_test_resized, y_test_cat, verbose=0)
    
    # Store results
    model_results[model_name] = {
        'model': model,
        'history': history,
        'test_accuracy': test_acc,
        'test_loss': test_loss,
        'parameters': model.count_params()
    }
    
    print(f"{model_name} - Test Accuracy: {test_acc:.4f}, Parameters: {model.count_params():,}")

print("\nAll models trained!")

In [None]:
# Compare model performance
def compare_models(model_results):
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Test accuracy comparison
    models = list(model_results.keys())
    accuracies = [model_results[model]['test_accuracy'] for model in models]
    parameters = [model_results[model]['parameters'] for model in models]
    
    axes[0, 0].bar(models, accuracies, color=['blue', 'green', 'red'])
    axes[0, 0].set_title('Test Accuracy Comparison')
    axes[0, 0].set_ylabel('Accuracy')
    axes[0, 0].set_ylim(0, 1)
    
    # Add accuracy values on bars
    for i, acc in enumerate(accuracies):
        axes[0, 0].text(i, acc + 0.01, f'{acc:.3f}', ha='center')
    
    # Parameter count comparison
    axes[0, 1].bar(models, [p/1e6 for p in parameters], color=['blue', 'green', 'red'])
    axes[0, 1].set_title('Model Size Comparison')
    axes[0, 1].set_ylabel('Parameters (Millions)')
    
    # Training history comparison
    for model_name in models:
        history = model_results[model_name]['history']
        axes[1, 0].plot(history.history['accuracy'], label=f'{model_name} Train')
        axes[1, 0].plot(history.history['val_accuracy'], label=f'{model_name} Val', linestyle='--')
    
    axes[1, 0].set_title('Training Accuracy')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Accuracy')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    
    # Loss comparison
    for model_name in models:
        history = model_results[model_name]['history']
        axes[1, 1].plot(history.history['loss'], label=f'{model_name} Train')
        axes[1, 1].plot(history.history['val_loss'], label=f'{model_name} Val', linestyle='--')
    
    axes[1, 1].set_title('Training Loss')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('Loss')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Print summary table
    print("\nModel Comparison Summary:")
    print("-" * 60)
    print(f"{'Model':<12} {'Accuracy':<10} {'Parameters':<12} {'Size (MB)':<10}")
    print("-" * 60)
    
    for model_name in models:
        acc = model_results[model_name]['test_accuracy']
        params = model_results[model_name]['parameters']
        size_mb = params * 4 / (1024 * 1024)  # Approximate size in MB
        
        print(f"{model_name:<12} {acc:<10.4f} {params:<12,} {size_mb:<10.1f}")

compare_models(model_results)

## Fine-tuning

In [None]:
# Fine-tune the best performing model
best_model_name = max(model_results.keys(), key=lambda k: model_results[k]['test_accuracy'])
print(f"Best performing model: {best_model_name}")

# Create a new model for fine-tuning
finetune_model, finetune_base = create_transfer_model(best_model_name, trainable=True)

# Freeze early layers, unfreeze later layers
finetune_base.trainable = True

# Freeze the first 80% of layers
freeze_layers = int(len(finetune_base.layers) * 0.8)
for layer in finetune_base.layers[:freeze_layers]:
    layer.trainable = False

print(f"Total layers in base model: {len(finetune_base.layers)}")
print(f"Frozen layers: {freeze_layers}")
print(f"Trainable layers: {len(finetune_base.layers) - freeze_layers}")

# Compile with lower learning rate for fine-tuning
finetune_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.00001),  # Lower learning rate
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print(f"\nFine-tuning model parameters:")
print(f"Total: {finetune_model.count_params():,}")
print(f"Trainable: {sum([tf.size(w).numpy() for w in finetune_model.trainable_weights]):,}")

In [None]:
# Train fine-tuned model
print("Fine-tuning model...")

finetune_history = finetune_model.fit(
    x_train_resized, y_train_cat,
    batch_size=16,  # Smaller batch size for fine-tuning
    epochs=10,
    validation_data=(x_test_resized, y_test_cat),
    callbacks=[
        keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
    ],
    verbose=1
)

# Evaluate fine-tuned model
finetune_loss, finetune_acc = finetune_model.evaluate(x_test_resized, y_test_cat, verbose=0)

print(f"\nFine-tuned model performance:")
print(f"Test Accuracy: {finetune_acc:.4f}")
print(f"Original {best_model_name} Accuracy: {model_results[best_model_name]['test_accuracy']:.4f}")
print(f"Improvement: {finetune_acc - model_results[best_model_name]['test_accuracy']:.4f}")

## Feature Extraction and Visualization

In [None]:
# Extract features using pre-trained model
def extract_features(model, x_data, layer_name=None):
    """Extract features from a specific layer"""
    
    if layer_name:
        # Extract from specific layer
        feature_extractor = keras.Model(
            inputs=model.input,
            outputs=model.get_layer(layer_name).output
        )
    else:
        # Extract from second-to-last layer (before final classification)
        feature_extractor = keras.Model(
            inputs=model.input,
            outputs=model.layers[-2].output
        )
    
    features = feature_extractor.predict(x_data, verbose=0)
    return features

# Extract features from the best model
best_model = model_results[best_model_name]['model']
features = extract_features(best_model, x_test_resized[:100])  # Use first 100 test samples

print(f"Extracted features shape: {features.shape}")
print(f"Feature vector length: {features.shape[1]}")

In [None]:
# Visualize features using t-SNE
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

# Reduce dimensionality first with PCA, then t-SNE
pca = PCA(n_components=50)
features_pca = pca.fit_transform(features)

tsne = TSNE(n_components=2, random_state=42, perplexity=30)
features_2d = tsne.fit_transform(features_pca)

# Plot t-SNE visualization
plt.figure(figsize=(12, 8))

# Get labels for first 100 test samples
labels_subset = y_test_subset[:100].flatten()

# Create scatter plot with different colors for each class
colors = plt.cm.tab10(np.linspace(0, 1, 10))

for i in range(10):
    mask = labels_subset == i
    plt.scatter(features_2d[mask, 0], features_2d[mask, 1], 
               c=[colors[i]], label=class_names[i], alpha=0.7, s=50)

plt.title(f'Feature Visualization using t-SNE\n({best_model_name} features)')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"PCA explained variance ratio (first 10 components): {pca.explained_variance_ratio_[:10]}")
print(f"Total variance explained by 50 components: {pca.explained_variance_ratio_.sum():.3f}")

## Prediction Analysis

In [None]:
# Analyze predictions from the best model
best_model = model_results[best_model_name]['model']
predictions = best_model.predict(x_test_resized, verbose=0)
pred_classes = np.argmax(predictions, axis=1)
true_classes = y_test_subset.flatten()

# Confusion matrix
cm = confusion_matrix(true_classes, pred_classes)

plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.title(f'Confusion Matrix - {best_model_name}')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

# Classification report
print(f"Classification Report - {best_model_name}:")
print(classification_report(true_classes, pred_classes, target_names=class_names))

In [None]:
# Show examples of correct and incorrect predictions
def show_prediction_examples(x_test, y_true, predictions, class_names, num_examples=10):
    pred_classes = np.argmax(predictions, axis=1)
    confidences = np.max(predictions, axis=1)
    
    # Find correct and incorrect predictions
    correct_mask = (pred_classes == y_true)
    incorrect_mask = ~correct_mask
    
    correct_indices = np.where(correct_mask)[0]
    incorrect_indices = np.where(incorrect_mask)[0]
    
    fig, axes = plt.subplots(2, num_examples, figsize=(20, 8))
    
    # Show correct predictions
    for i in range(min(num_examples, len(correct_indices))):
        idx = correct_indices[i]
        
        # Convert from 224x224 back to displayable format
        img = x_test[idx].numpy().astype('uint8')
        
        axes[0, i].imshow(img)
        axes[0, i].set_title(f'✓ {class_names[pred_classes[idx]]}\n({confidences[idx]:.2f})', 
                           color='green', fontsize=10)
        axes[0, i].axis('off')
    
    # Show incorrect predictions
    for i in range(min(num_examples, len(incorrect_indices))):
        idx = incorrect_indices[i]
        
        img = x_test[idx].numpy().astype('uint8')
        
        axes[1, i].imshow(img)
        axes[1, i].set_title(f'✗ Pred: {class_names[pred_classes[idx]]}\nTrue: {class_names[y_true[idx]]}\n({confidences[idx]:.2f})', 
                           color='red', fontsize=10)
        axes[1, i].axis('off')
    
    # Hide unused subplots
    for i in range(min(num_examples, len(correct_indices)), num_examples):
        axes[0, i].axis('off')
    for i in range(min(num_examples, len(incorrect_indices)), num_examples):
        axes[1, i].axis('off')
    
    plt.suptitle(f'Prediction Examples - {best_model_name}\nTop: Correct Predictions, Bottom: Incorrect Predictions', 
                 fontsize=16)
    plt.tight_layout()
    plt.show()

show_prediction_examples(x_test_resized, true_classes, predictions, class_names)

## Summary and Conclusions

In [None]:
# Summary of all experiments
print("=" * 80)
print("TRANSFER LEARNING EXPERIMENT SUMMARY")
print("=" * 80)

print(f"Dataset: CIFAR-10 (subset of {train_size} training, {test_size} test samples)")
print(f"Input size: 224x224 (resized from 32x32)")
print(f"Number of classes: 10")
print()

print("Model Performance Comparison:")
print("-" * 50)
for model_name, results in model_results.items():
    print(f"{model_name}:")
    print(f"  Test Accuracy: {results['test_accuracy']:.4f}")
    print(f"  Parameters: {results['parameters']:,}")
    print(f"  Model Size: ~{results['parameters'] * 4 / (1024*1024):.1f} MB")
    print()

print(f"Best Model: {best_model_name}")
print(f"Fine-tuned Accuracy: {finetune_acc:.4f}")
print(f"Improvement from fine-tuning: {finetune_acc - model_results[best_model_name]['test_accuracy']:.4f}")
print()

print("Key Insights:")
print("- Transfer learning significantly reduces training time")
print("- Pre-trained features work well even for different domains")
print("- Fine-tuning can provide additional performance gains")
print("- Model size vs. accuracy trade-offs are important for deployment")
print("- Feature visualization helps understand model behavior")
print("=" * 80)