In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import os
from typing import Tuple, List, Dict
import time

In [None]:
class TrainingConfig:
    """Configuration class for training parameters"""
    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.data_dir = "ham10000_processed"
        self.batch_size = 32
        self.num_epochs = 15
        self.learning_rate = 1e-4
        self.weight_decay = 1e-4
        self.dropout_rate = 0.3
        self.label_smoothing = 0.1
        self.train_split = 0.8
        self.num_workers = 4
        self.scheduler_patience = 3
        self.scheduler_factor = 0.5
        
    def print_config(self):
        """Print current configuration"""
        print("Training Configuration:")
        print(f"  Device: {self.device}")
        print(f"  Data Directory: {self.data_dir}")
        print(f"  Batch Size: {self.batch_size}")
        print(f"  Epochs: {self.num_epochs}")
        print(f"  Learning Rate: {self.learning_rate}")
        print(f"  Weight Decay: {self.weight_decay}")
        print(f"  Dropout Rate: {self.dropout_rate}")
        print("-" * 50)

# Initialize configuration
config = TrainingConfig()
config.print_config()

In [None]:
def get_transforms():
    """Create train and validation transforms"""
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.2),
        transforms.RandomRotation(20),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    return train_transform, val_transform

def create_data_loaders(data_dir: str, config: TrainingConfig):
    """Create train and validation data loaders"""
    print("Loading dataset...")
    
    # Get transforms
    train_transform, val_transform = get_transforms()
    
    # Load full dataset to get classes
    full_dataset = datasets.ImageFolder(root=data_dir)
    num_classes = len(full_dataset.classes)
    print(f"Number of classes: {num_classes}")
    print(f"Classes: {full_dataset.classes}")
    
    # Train/val split
    train_size = int(config.train_split * len(full_dataset))
    val_size = len(full_dataset) - train_size
    train_indices, val_indices = torch.utils.data.random_split(
        range(len(full_dataset)), [train_size, val_size]
    )
    
    # Create separate datasets with different transforms
    train_dataset = torch.utils.data.Subset(
        datasets.ImageFolder(root=data_dir, transform=train_transform), 
        train_indices.indices
    )
    val_dataset = torch.utils.data.Subset(
        datasets.ImageFolder(root=data_dir, transform=val_transform), 
        val_indices.indices
    )

    # Data loaders
    train_loader = DataLoader(
        train_dataset, 
        batch_size=config.batch_size, 
        shuffle=True, 
        num_workers=config.num_workers, 
        pin_memory=True
    )
    val_loader = DataLoader(
        val_dataset, 
        batch_size=config.batch_size, 
        shuffle=False,
        num_workers=config.num_workers, 
        pin_memory=True
    )
    
    print(f"Train samples: {len(train_dataset)}")
    print(f"Validation samples: {len(val_dataset)}")
    
    return train_loader, val_loader, num_classes

# Test the functions
train_loader, val_loader, num_classes = create_data_loaders(config.data_dir, config)

In [None]:
def create_model(num_classes: int, config: TrainingConfig):
    """Create and setup ResNet50 model"""
    print("Creating ResNet50 model...")
    
    # Load pre-trained ResNet50
    model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
    
    # Replace the final layer for our number of classes
    model.fc = nn.Sequential(
        nn.Dropout(config.dropout_rate),
        nn.Linear(model.fc.in_features, num_classes)
    )
    
    # Move to device
    model = model.to(config.device)
    
    # Count parameters
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total parameters: {total_params:,}")
    print(f"Trainable parameters: {trainable_params:,}")
    
    return model

def setup_training_components(model, config: TrainingConfig):
    """Setup loss function, optimizer, and scheduler"""
    print("Setting up training components...")
    
    # Loss function with label smoothing
    criterion = nn.CrossEntropyLoss(label_smoothing=config.label_smoothing)
    
    # Optimizer
    optimizer = optim.AdamW(
        model.parameters(), 
        lr=config.learning_rate, 
        weight_decay=config.weight_decay
    )
    
    # Learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 
        mode='max', 
        factor=config.scheduler_factor, 
        patience=config.scheduler_patience, 
        verbose=True
    )
    
    return criterion, optimizer, scheduler

# Create model and training components
model = create_model(num_classes, config)
criterion, optimizer, scheduler = setup_training_components(model, config)

In [None]:
def train_epoch(model, train_loader, criterion, optimizer, config: TrainingConfig, epoch: int):
    """Train for one epoch"""
    model.train()
    train_loss = 0.0
    correct = total = 0
    
    for batch_idx, (images, labels) in enumerate(train_loader):
        images, labels = images.to(config.device, non_blocking=True), labels.to(config.device, non_blocking=True)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Print progress every 30 batches
        if batch_idx % 30 == 0:
            print(f'Epoch {epoch+1}/{config.num_epochs}, Batch {batch_idx}/{len(train_loader)}, Loss: {loss.item():.4f}')

    train_acc = correct / total
    train_loss /= len(train_loader)
    
    return train_loss, train_acc

def validate_epoch(model, val_loader, criterion, config: TrainingConfig):
    """Validate for one epoch"""
    model.eval()
    val_loss = 0.0
    correct = total = 0
    
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(config.device, non_blocking=True), labels.to(config.device, non_blocking=True)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = correct / total
    val_loss /= len(val_loader)
    
    return val_loss, val_acc

def save_model(model, val_acc: float, best_val_acc: float, epoch: int):
    """Save model if it's the best so far"""
    if val_acc > best_val_acc:
        torch.save(model.state_dict(), "best_resnet50_skin_cancer.pth")
        print(f"New best model saved with val_acc: {val_acc:.4f}")
        return val_acc
    return best_val_acc

# Test one epoch (optional - comment out if you want to skip)
print("Testing training functions...")
# train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, config, 0)
# val_loss, val_acc = validate_epoch(model, val_loader, criterion, config)
# print(f"Test - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
# print(f"Test - Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

In [None]:
# Main Training Loop
print(f"Starting ResNet50 training for {config.num_epochs} epochs...")
print("-" * 60)

# Training history
train_accs, val_accs, train_losses, val_losses = [], [], [], []
best_val_acc = 0.0
start_time = time.time()

for epoch in range(config.num_epochs):
    epoch_start = time.time()
    
    # Training phase
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, config, epoch)
    train_accs.append(train_acc)
    train_losses.append(train_loss)

    # Validation phase
    val_loss, val_acc = validate_epoch(model, val_loader, criterion, config)
    val_accs.append(val_acc)
    val_losses.append(val_loss)
    
    # Learning rate scheduling
    scheduler.step(val_acc)
    
    # Save best model
    best_val_acc = save_model(model, val_acc, best_val_acc, epoch)

    # Print epoch results
    current_lr = optimizer.param_groups[0]['lr']
    epoch_time = time.time() - epoch_start
    print(f"Epoch [{epoch+1}/{config.num_epochs}] - Time: {epoch_time:.2f}s")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, LR: {current_lr:.6f}")
    print("-" * 60)

# Save final model
torch.save(model.state_dict(), "final_resnet50_skin_cancer.pth")
total_time = time.time() - start_time

print(f"Training completed!")
print(f"Total training time: {total_time/60:.2f} minutes")
print(f"Best validation accuracy: {best_val_acc:.4f}")

In [None]:
def plot_training_results(train_accs, val_accs, train_losses, val_losses, best_val_acc, config):
    """Plot training and validation metrics"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    epochs = range(1, len(train_accs) + 1)
    
    # Accuracy plot
    ax1.plot(epochs, train_accs, label='Train Accuracy', marker='o', linewidth=2)
    ax1.plot(epochs, val_accs, label='Validation Accuracy', marker='s', linewidth=2)
    ax1.axhline(y=best_val_acc, color='r', linestyle='--', alpha=0.7, 
                label=f'Best Val Acc: {best_val_acc:.4f}')
    ax1.set_xlabel("Epoch")
    ax1.set_ylabel("Accuracy")
    ax1.set_title("ResNet50 - Training and Validation Accuracy")
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    ax1.set_ylim(0, 1)
    
    # Loss plot
    ax2.plot(epochs, train_losses, label='Train Loss', marker='o', linewidth=2)
    ax2.plot(epochs, val_losses, label='Validation Loss', marker='s', linewidth=2)
    ax2.set_xlabel("Epoch")
    ax2.set_ylabel("Loss")
    ax2.set_title("ResNet50 - Training and Validation Loss")
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig("resnet50_training_plot.png", dpi=300, bbox_inches='tight')
    plt.show()
    
    return fig

def print_final_summary(best_val_acc, config, total_params):
    """Print final training summary"""
    print("\n" + "="*60)
    print("TRAINING SUMMARY")
    print("="*60)
    print(f"Model: ResNet50")
    print(f"Best Validation Accuracy: {best_val_acc:.4f} ({best_val_acc*100:.2f}%)")
    print(f"Total Parameters: {total_params:,}")
    print(f"Epochs Completed: {config.num_epochs}")
    print(f"Final Learning Rate: {optimizer.param_groups[0]['lr']:.6f}")
    print(f"Dataset: {config.data_dir}")
    print(f"Device: {config.device}")
    print("="*60)

# Plot results and show summary
plot_training_results(train_accs, val_accs, train_losses, val_losses, best_val_acc, config)
total_params = sum(p.numel() for p in model.parameters())
print_final_summary(best_val_acc, config, total_params)

# Model Evaluation and Testing

Now you can load the best model and evaluate it on test data or use it for inference.

In [None]:
def load_best_model(model_path: str, num_classes: int, config: TrainingConfig):
    """Load the best saved model for inference"""
    model = create_model(num_classes, config)
    model.load_state_dict(torch.load(model_path, map_location=config.device))
    model.eval()
    print(f"Best model loaded from {model_path}")
    return model

def predict_sample(model, val_loader, config, num_samples=5):
    """Predict on a few validation samples"""
    class_names = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']
    
    model.eval()
    with torch.no_grad():
        for i, (images, labels) in enumerate(val_loader):
            if i >= num_samples:
                break
                
            images = images.to(config.device)
            outputs = model(images)
            probabilities = torch.softmax(outputs, dim=1)
            _, predicted = torch.max(outputs, 1)
            
            # Show results for first image in batch
            true_label = labels[0].item()
            pred_label = predicted[0].item()
            confidence = probabilities[0][pred_label].item()
            
            print(f"Sample {i+1}:")
            print(f"  True: {class_names[true_label]}")
            print(f"  Predicted: {class_names[pred_label]} (confidence: {confidence:.3f})")
            print(f"  Correct: {'✓' if true_label == pred_label else '✗'}")
            print()

# Example: Load best model and make predictions
try:
    best_model = load_best_model("best_resnet50_skin_cancer.pth", num_classes, config)
    predict_sample(best_model, val_loader, config, num_samples=3)
except FileNotFoundError:
    print("Best model not found. Run the training cells first!")