In [6]:
# Import Required Libraries
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import json
import pickle
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Import custom modules (force reload)
sys.path.insert(0, os.getcwd())

# Clear any cached imports
if 'models' in sys.modules:
    del sys.modules['models']
if 'data_loading' in sys.modules:
    del sys.modules['data_loading']
if 'utility' in sys.modules:
    del sys.modules['utility']
if 'config' in sys.modules:
    del sys.modules['config']

from data_loading import WaferDataLoader
from utility import (setup_model_and_loaders, hyperparameter_tuning, 
                     evaluate_model, train_model)
from models import WaferMobileNet
from config import TRANSFER_LEARNING_TUNING_GRID

# Setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"✓ All libraries imported successfully!")
print(f"Device: {device}")

Helper functions defined successfully!
✓ All libraries imported successfully!
Device: cuda
✓ All libraries imported successfully!
Device: cuda


## 1. Load and Prepare Data

In [7]:
# Load data using WaferDataLoader
print("Loading wafer defect dataset...")
loader = WaferDataLoader()

print(f"✓ Dataset loaded successfully!")
print(f"X shape: {loader.X.shape}")
print(f"y shape: {loader.y.shape}")
print(f"Number of classes: {loader.num_classes}")

# Get the data
X = loader.X.astype('float32')
y = loader.y

# Normalize data
X_min, X_max = X.min(), X.max()
X_normalized = (X - X_min) / (X_max - X_min) if X_max > X_min else X

print(f"\n✓ Normalization:")
print(f"  Original range: [{X_min}, {X_max}]")
print(f"  Normalized range: [{X_normalized.min():.4f}, {X_normalized.max():.4f}]")

# Reshape to 2D image format: (N, 2704) -> (N, 52, 52)
X_normalized = X_normalized.reshape(-1, 52, 52)

# Expand to 3 channels for MobileNetV2: (N, 52, 52) -> (N, 3, 52, 52)
# Replicate grayscale across 3 channels
X_normalized = np.repeat(X_normalized[:, np.newaxis, :, :], 3, axis=1)
print(f"\n✓ Reshaped for MobileNetV2: {X_normalized.shape}")

# Split into train/val/test (70% / 15% / 15%)
X_train, X_temp, y_train, y_temp = train_test_split(
    X_normalized, y, test_size=0.3, random_state=42, stratify=y
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print(f"\n✓ Data split complete:")
print(f"  Training set: {X_train.shape[0]} samples ({X_train.shape[0]/len(y)*100:.1f}%)")
print(f"  Validation set: {X_val.shape[0]} samples ({X_val.shape[0]/len(y)*100:.1f}%)")
print(f"  Test set: {X_test.shape[0]} samples ({X_test.shape[0]/len(y)*100:.1f}%)")

Loading wafer defect dataset...
Loading dataset from kagglehub cache...
✓ Dataset loaded successfully
  Wafer maps shape: (38015, 52, 52)
  Raw labels shape: (38015, 8)
✓ Labels converted to class indices
  Number of unique defect classes: 38
✓ Dataset loaded successfully!
X shape: (38015, 52, 52)
y shape: (38015,)
Number of classes: 38
✓ Dataset loaded successfully
  Wafer maps shape: (38015, 52, 52)
  Raw labels shape: (38015, 8)
✓ Labels converted to class indices
  Number of unique defect classes: 38
✓ Dataset loaded successfully!
X shape: (38015, 52, 52)
y shape: (38015,)
Number of classes: 38

✓ Normalization:
  Original range: [0.0, 3.0]
  Normalized range: [0.0000, 1.0000]

✓ Normalization:
  Original range: [0.0, 3.0]
  Normalized range: [0.0000, 1.0000]

✓ Reshaped for MobileNetV2: (38015, 3, 52, 52)

✓ Reshaped for MobileNetV2: (38015, 3, 52, 52)

✓ Data split complete:
  Training set: 26610 samples (70.0%)
  Validation set: 5702 samples (15.0%)
  Test set: 5703 samples (15.

## 2. Hyperparameter Tuning Grid

In [None]:
# Display tuning grid
print("="*80)
print("TRANSFER LEARNING - HYPERPARAMETER TUNING GRID".center(80))
print("="*80)
print(f"\nTuning Grid:")
for param, values in TRANSFER_LEARNING_TUNING_GRID.items():
    print(f"  {param}: {values}")

total_combinations = np.prod([len(v) for v in TRANSFER_LEARNING_TUNING_GRID.values()])
print(f"\nTotal combinations to evaluate: {total_combinations}")
print("="*80)

## 3. Run Hyperparameter Tuning

In [9]:
# Hyperparameter tuning for Transfer Learning with MobileNetV2
print("=" * 80)
print("HYPERPARAMETER TUNING: Transfer Learning (MobileNetV2)")
print("=" * 80)

# Define hyperparameter grid - minimal for testing
# Run with just 2 combinations for a quick test
TRANSFER_LEARNING_TUNING_GRID = {
    'learning_rate': [0.0001],  # Just 1 learning rate
    'batch_size': [32],         # Just 1 batch size
    'num_epochs': [5],          # Just 5 epochs for quick testing
}

print("\n✓ Starting hyperparameter tuning...")
print(f"  Total combinations: {np.prod([len(v) for v in TRANSFER_LEARNING_TUNING_GRID.values()])}")

tl_results = hyperparameter_tuning(
    model_class=WaferMobileNet,
    X_train=X_train[:5000],  # Use subset of training data for speed
    X_val=X_val[:1000],      # Use subset of validation data
    X_test=X_test,
    y_train=y_train[:5000],
    y_val=y_val[:1000],
    y_test=y_test,
    param_grid=TRANSFER_LEARNING_TUNING_GRID,
    input_size=None,
    num_classes=loader.num_classes,
    device=str(device),
    verbose=False,
)

# Extract results dataframe and get top 5
if tl_results['best_params'] is not None:
    print(f"\n✓ Tuning complete! Tested {len(tl_results['results'])} hyperparameter combinations")
    tl_summary_df = tl_results['summary_df']
    tl_top5 = tl_summary_df.head(min(5, len(tl_summary_df)))

    print(f"\nTop {len(tl_top5)} Best Results:")
    print(tl_top5[['learning_rate', 'batch_size', 'num_epochs', 'Val_Acc', 'Test_Acc']].to_string(index=False))
    
    print(f"\n✓ Best Model Performance:")
    print(f"  Validation Accuracy: {tl_results['best_val_acc']:.4f}")
    print(f"  Test Accuracy: {tl_results['best_test_acc']:.4f}")
else:
    print(f"\nERROR: All hyperparameter combinations failed!")
    if tl_results['results']:
        print(f"First result: {tl_results['results'][0]}")

HYPERPARAMETER TUNING: Transfer Learning (MobileNetV2)

✓ Testing model instantiation...
  Model created successfully!
  Model type: <class 'models.WaferMobileNet'>
  Forward pass successful! Output shape: torch.Size([2, 38])

✓ Starting hyperparameter tuning...
  Total combinations: 8


                                                                      

KeyboardInterrupt: 

## 4. Validation Loss Curves - Top 5

In [None]:
# Retrain top 5 models to get training histories
print("\nRetraining top 5 models to generate loss curves...")

def retrain_with_history(model_class, X_train, X_val, X_test, y_train, y_val, y_test, 
                         params, num_classes, device_str):
    """Retrain a model with given params and return training history"""
    try:
        # Extract parameters from params dict
        learning_rate = params.get('learning_rate', 0.0001)
        batch_size = int(params.get('batch_size', 32))
        epochs_to_train = int(params.get('num_epochs', 20))
        
        # Setup model and loaders
        setup_result = setup_model_and_loaders(
            model_class, X_train, X_val, X_test, y_train, y_val, y_test,
            input_size=None, num_classes=num_classes, device=device_str,
            batch_size=batch_size, model_kwargs={}, verbose=False
        )
        
        model = setup_result['model']
        train_loader = setup_result['train_loader']
        val_loader = setup_result['val_loader']
        
        # Setup optimizer (Transfer learning typically uses Adam with lower LR)
        opt = optim.Adam(model.parameters(), lr=learning_rate)
        
        # Train
        criterion = nn.CrossEntropyLoss()
        history = train_model(
            model, train_loader, val_loader, criterion, opt,
            num_epochs=epochs_to_train, device=device_str, patience=5
        )
        
        return history
    except Exception as e:
        print(f"Error: {e}")
        return None

# Get histories for top 5
top5_histories = []
for i, (idx, row) in enumerate(tl_top5.iterrows(), 1):
    print(f"  Retraining rank {i}/5...")
    
    # Extract parameters from row
    params = {
        'learning_rate': float(row['learning_rate']),
        'batch_size': int(row['batch_size']),
        'num_epochs': int(row['num_epochs']),
    }
    
    history = retrain_with_history(
        WaferMobileNet, X_train, X_val, X_test, y_train, y_val, y_test,
        params, num_classes=loader.num_classes, device_str=str(device)
    )
    if history:
        top5_histories.append(history)

print(f"✓ Successfully generated {len(top5_histories)} training histories")

In [None]:
# Plot validation loss curves for top 5
fig, axes = plt.subplots(2, 3, figsize=(16, 10))
fig.suptitle('Transfer Learning (MobileNetV2) - Top 5 Configurations\nTraining vs Validation Loss', 
             fontsize=14, fontweight='bold')

for idx, history in enumerate(top5_histories):
    row = idx // 3
    col = idx % 3
    ax = axes[row, col]
    
    epochs = range(1, len(history['train_loss']) + 1)
    ax.plot(epochs, history['train_loss'], 'b-', label='Training Loss', linewidth=2, marker='o', markersize=4)
    ax.plot(epochs, history['val_loss'], 'r-', label='Validation Loss', linewidth=2, marker='s', markersize=4)
    
    rank = idx + 1
    val_acc = tl_top5.iloc[idx]['Val_Acc']
    ax.set_title(f'Rank {rank} - Val Acc: {val_acc:.4f}', fontweight='bold', fontsize=11)
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Loss')
    ax.legend(loc='upper right')
    ax.grid(True, alpha=0.3)

# Hide unused subplot
axes[1, 2].set_visible(False)

plt.tight_layout()
plt.savefig('transfer_learning_validation_loss_curves.png', dpi=300, bbox_inches='tight')
print("✓ Loss curves plot saved as 'transfer_learning_validation_loss_curves.png'")
plt.show()

## 5. Save Results

In [None]:
# Create results directory
results_dir = 'transfer_learning_results'
os.makedirs(results_dir, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

print("\n" + "="*80)
print("SAVING RESULTS".center(80))
print("="*80)

# 1. Save summary CSV with all results
tl_summary_df.to_csv(f'{results_dir}/all_results_{timestamp}.csv', index=False)
print(f"✓ All results saved to: all_results_{timestamp}.csv")

# 2. Save top 5 CSV
tl_top5.to_csv(f'{results_dir}/top5_results_{timestamp}.csv', index=False)
print(f"✓ Top 5 results saved to: top5_results_{timestamp}.csv")

# 3. Save best model info
best_model_info = {
    'best_params': tl_results['best_params'],
    'best_val_acc': float(tl_results['best_val_acc']),
    'best_test_acc': float(tl_results['best_test_acc']),
}

with open(f'{results_dir}/best_model_{timestamp}.json', 'w') as f:
    json.dump(best_model_info, f, indent=4, default=str)
print(f"✓ Best model info saved to: best_model_{timestamp}.json")

# 4. Save best model itself
torch.save(tl_results['best_model'].state_dict(), 
           f'{results_dir}/best_model_weights_{timestamp}.pt')
print(f"✓ Best model weights saved to: best_model_weights_{timestamp}.pt")

# 5. Save training history for best model
with open(f'{results_dir}/best_model_history_{timestamp}.pkl', 'wb') as f:
    pickle.dump(tl_results['best_history'], f)
print(f"✓ Best model training history saved to: best_model_history_{timestamp}.pkl")

# 6. Save training histories for top 5
for i, history in enumerate(top5_histories, 1):
    with open(f'{results_dir}/rank_{i:02d}_history_{timestamp}.pkl', 'wb') as f:
        pickle.dump(history, f)
print(f"✓ Top 5 training histories saved")

# 7. Save loss curves data as CSV for easy access
for i, history in enumerate(top5_histories, 1):
    loss_df = pd.DataFrame({
        'Epoch': range(1, len(history['train_loss']) + 1),
        'Train_Loss': history['train_loss'],
        'Val_Loss': history['val_loss'],
        'Train_Acc': history['train_acc'],
        'Val_Acc': history['val_acc'],
    })
    loss_df.to_csv(f'{results_dir}/rank_{i:02d}_loss_curves_{timestamp}.csv', index=False)
print(f"✓ Loss curves data saved as CSV for all top 5")

# 8. Save summary report
total_combinations = len(tl_results['results'])
summary = {
    'model': 'Transfer Learning (MobileNetV2)',
    'timestamp': timestamp,
    'total_combinations': int(total_combinations),
    'best_val_accuracy': float(tl_results['best_val_acc']),
    'best_test_accuracy': float(tl_results['best_test_acc']),
    'best_hyperparameters': tl_results['best_params'],
    'top5_accuracies': tl_top5['Val_Acc'].tolist(),
}

with open(f'{results_dir}/summary_report_{timestamp}.json', 'w') as f:
    json.dump(summary, f, indent=4, default=str)
print(f"✓ Summary report saved to: summary_report_{timestamp}.json")

print("\n" + "="*80)
print(f"All results saved to: {results_dir}/".center(80))
print("="*80)

## 6. Results Summary

In [None]:
print("\n" + "="*80)
print("TRANSFER LEARNING (MOBILENETV2) - TUNING RESULTS SUMMARY".center(80))
print("="*80)

print(f"\nBest Model Performance:")
print(f"  Validation Accuracy: {tl_results['best_val_acc']:.4f}")
print(f"  Test Accuracy: {tl_results['best_test_acc']:.4f}")

print(f"\nBest Hyperparameters:")
for key, value in tl_results['best_params'].items():
    print(f"  {key}: {value}")

print(f"\nTop 5 Validation Accuracies:")
for rank, acc in enumerate(tl_top5['Val_Acc'].values, 1):
    print(f"  Rank {rank}: {acc:.4f}")

total_combinations = len(tl_results['results'])
print(f"\nTotal tuning combinations evaluated: {total_combinations}")
print("\n" + "="*80)

## 6. Best Model Evaluation - Classification Report & Confusion Matrix

In [None]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support, accuracy_score
from datetime import datetime

# Load best model and generate predictions
best_model = tl_results['best_model']
best_model.eval()

# Debug: Check X_test shape
print(f"Original X_test shape: {X_test.shape}")

# X_test is already 3-channel from preprocessing: (N, 3, 52, 52)
# No need to convert - use directly
X_test_tensor = torch.FloatTensor(X_test).to(device)
print(f"X_test_tensor shape for MobileNet: {X_test_tensor.shape}")

with torch.no_grad():
    logits = best_model(X_test_tensor)
    predictions = torch.argmax(logits, dim=1).cpu().numpy()

# Generate classification report
class_names = [f'Class_{i:02d}' for i in range(38)]
report = classification_report(y_test, predictions, target_names=class_names, digits=4)
print("\n" + "="*80)
print("CLASSIFICATION REPORT - Transfer Learning BEST MODEL (All 38 Classes)")
print("="*80)
print(report)

# Save classification report
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
report_path = f'classification_report_{timestamp}.txt'
with open(report_path, 'w') as f:
    f.write("CLASSIFICATION REPORT - Transfer Learning BEST MODEL (All 38 Classes)\n")
    f.write("="*80 + "\n")
    f.write(report)
print(f"\n✓ Classification report saved to: {report_path}")

# Compute and visualize confusion matrix
cm = confusion_matrix(y_test, predictions)
print(f"\nConfusion Matrix Shape: {cm.shape}")

fig, ax = plt.subplots(figsize=(16, 14))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=True, ax=ax, 
            xticklabels=class_names, yticklabels=class_names)
ax.set_xlabel('Predicted Label', fontsize=12, fontweight='bold')
ax.set_ylabel('True Label', fontsize=12, fontweight='bold')
ax.set_title('Confusion Matrix - Transfer Learning Best Model (38×38)', fontsize=14, fontweight='bold')
plt.xticks(rotation=45, ha='right', fontsize=8)
plt.yticks(rotation=0, fontsize=8)
plt.tight_layout()

cm_path = f'confusion_matrix_{timestamp}.png'
plt.savefig(cm_path, dpi=300, bbox_inches='tight')
print(f"✓ Confusion matrix saved to: {cm_path}")
plt.show()

# Extract per-class metrics
precision, recall, f1, support = precision_recall_fscore_support(y_test, predictions, labels=range(38))
class_accuracy = recall  # Recall is per-class accuracy

# Create metrics dataframe
metrics_df = pd.DataFrame({
    'Class': class_names,
    'Precision': precision,
    'Recall': recall,
    'F1-Score': f1,
    'Support': support,
    'Accuracy': class_accuracy
})

print("\n" + "="*80)
print("PER-CLASS METRICS SUMMARY")
print("="*80)
print(metrics_df.to_string(index=False))

# Save per-class metrics
metrics_path = f'class_wise_metrics_{timestamp}.csv'
metrics_df.to_csv(metrics_path, index=False)
print(f"\n✓ Per-class metrics saved to: {metrics_path}")

# Print summary statistics
print("\n" + "="*80)
print("SUMMARY STATISTICS")
print("="*80)
print(f"Overall Accuracy: {accuracy_score(y_test, predictions):.4f}")
print(f"Macro-Average Precision: {precision.mean():.4f}")
print(f"Macro-Average Recall: {recall.mean():.4f}")
print(f"Macro-Average F1-Score: {f1.mean():.4f}")
print(f"\nBest Performing Classes (Top 5 by F1-Score):")
top_5 = metrics_df.nlargest(5, 'F1-Score')[['Class', 'F1-Score', 'Recall', 'Support']]
print(top_5.to_string(index=False))
print(f"\nWorst Performing Classes (Bottom 5 by F1-Score):")
bottom_5 = metrics_df.nsmallest(5, 'F1-Score')[['Class', 'F1-Score', 'Recall', 'Support']]
print(bottom_5.to_string(index=False))

# Create comprehensive 4-subplot visualization
fig, axes = plt.subplots(2, 2, figsize=(18, 12))

# Subplot 1: Per-class Accuracy (Recall) with color coding
ax1 = axes[0, 0]
colors_acc = ['green' if x >= 0.8 else 'orange' if x >= 0.6 else 'red' for x in class_accuracy]
bars1 = ax1.bar(range(38), class_accuracy, color=colors_acc, edgecolor='black', linewidth=0.5)
ax1.axhline(y=class_accuracy.mean(), color='blue', linestyle='--', linewidth=2, label=f'Mean: {class_accuracy.mean():.3f}')
ax1.set_xlabel('Class', fontsize=12, fontweight='bold')
ax1.set_ylabel('Accuracy (Recall)', fontsize=12, fontweight='bold')
ax1.set_title('Per-Class Accuracy Distribution', fontsize=13, fontweight='bold')
ax1.set_xticks(range(0, 38, 2))
ax1.set_xticklabels([f'C{i}' for i in range(0, 38, 2)], fontsize=9)
ax1.legend(fontsize=10)
ax1.grid(axis='y', alpha=0.3)

# Subplot 2: Precision vs Recall Scatter
ax2 = axes[0, 1]
colors_pr = ['green' if x >= 0.8 else 'orange' if x >= 0.6 else 'red' for x in class_accuracy]
scatter = ax2.scatter(recall, precision, c=colors_pr, s=100, alpha=0.6, edgecolors='black', linewidth=0.5)
ax2.plot([0, 1], [0, 1], 'k--', linewidth=2, label='Perfect Performance')
ax2.set_xlabel('Recall', fontsize=12, fontweight='bold')
ax2.set_ylabel('Precision', fontsize=12, fontweight='bold')
ax2.set_title('Precision vs Recall (Per-Class)', fontsize=13, fontweight='bold')
ax2.set_xlim([-0.05, 1.05])
ax2.set_ylim([-0.05, 1.05])
ax2.legend(fontsize=10)
ax2.grid(alpha=0.3)

# Subplot 3: F1-Score by Class
ax3 = axes[1, 0]
colors_f1 = ['green' if x >= 0.8 else 'orange' if x >= 0.6 else 'red' for x in f1]
bars3 = ax3.bar(range(38), f1, color=colors_f1, edgecolor='black', linewidth=0.5)
ax3.axhline(y=f1.mean(), color='blue', linestyle='--', linewidth=2, label=f'Mean: {f1.mean():.3f}')
ax3.set_xlabel('Class', fontsize=12, fontweight='bold')
ax3.set_ylabel('F1-Score', fontsize=12, fontweight='bold')
ax3.set_title('F1-Score Distribution by Class', fontsize=13, fontweight='bold')
ax3.set_xticks(range(0, 38, 2))
ax3.set_xticklabels([f'C{i}' for i in range(0, 38, 2)], fontsize=9)
ax3.legend(fontsize=10)
ax3.grid(axis='y', alpha=0.3)

# Subplot 4: Support Distribution (Sample Count)
ax4 = axes[1, 1]
bars4 = ax4.bar(range(38), support, color='steelblue', edgecolor='black', linewidth=0.5)
ax4.axhline(y=support.mean(), color='red', linestyle='--', linewidth=2, label=f'Mean: {support.mean():.0f}')
ax4.set_xlabel('Class', fontsize=12, fontweight='bold')
ax4.set_ylabel('Number of Samples', fontsize=12, fontweight='bold')
ax4.set_title('Class Distribution in Test Set (Support)', fontsize=13, fontweight='bold')
ax4.set_xticks(range(0, 38, 2))
ax4.set_xticklabels([f'C{i}' for i in range(0, 38, 2)], fontsize=9)
ax4.legend(fontsize=10)
ax4.grid(axis='y', alpha=0.3)

plt.tight_layout()
accuracy_path = f'class_wise_accuracy_{timestamp}.png'
plt.savefig(accuracy_path, dpi=300, bbox_inches='tight')
print(f"\n✓ Class-wise accuracy visualization saved to: {accuracy_path}")
plt.show()

print("\n" + "="*80)
print("EVALUATION COMPLETE - All results saved successfully")
print("="*80)