In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from data_preprocessing import get_dataloaders

# ==========================================
# 1. CONFIGURATION
# ==========================================
MODEL_SAVE_PATH = "inception_v1_stanford_cars.pth"
BATCH_SIZE = 32
NUM_CLASSES = 196
NUM_EPOCHS = 30
IMG_SIZE = 299  # Inception requires 299x299 input

# ==========================================
# 2. MODEL ARCHITECTURE
# ==========================================
def get_inception_model(num_classes=196):
    # Load Pre-trained Inception V1 (GoogLeNet) with auxiliary classifiers enabled
    model = models.googlenet(weights=models.GoogLeNet_Weights.IMAGENET1K_V1, aux_logits=True)

    # --- STRATEGY: FINE-TUNING ---
    # 1. Freeze the early layers (generic features like lines/edges)
    for param in model.parameters():
        param.requires_grad = False
        
    # 2. Unfreeze the last inception modules for better feature learning
    # GoogLeNet has inception4 and inception5 modules
    for param in model.inception4e.parameters():
        param.requires_grad = True
    for param in model.inception5a.parameters():
        param.requires_grad = True
    for param in model.inception5b.parameters():
        param.requires_grad = True
        
    # 3. Replace the Final Classifier Head
    in_features = model.fc.in_features  # GoogLeNet has 1024 features
    model.fc = nn.Sequential(
        nn.Linear(in_features, 1024),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(1024, num_classes)
    )
    
    # Replace auxiliary classifiers to match our number of classes
    model.aux1.fc2 = nn.Linear(model.aux1.fc2.in_features, num_classes)
    model.aux2.fc2 = nn.Linear(model.aux2.fc2.in_features, num_classes)

    return model

# ==========================================
# 3. TRAINING SKELETON
# ==========================================

if __name__ == "__main__":
    
    print("Initializing Hugging Face Data Pipeline...")
    
    try:
        # 1. Load Data using HF pipeline (handles downloading, splitting, and transforms)
        print("Loading data from Hugging Face Hub...")
        train_dl, val_dl, test_dl = get_dataloaders(
            batch_size=BATCH_SIZE, 
            img_size=IMG_SIZE,  # Inception requires 299x299
            num_workers=0
        )
        
        print("Initializing Inception V1 (GoogLeNet)...")
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = get_inception_model(num_classes=NUM_CLASSES).to(device)
        
        # Hyperparameters
        criterion = nn.CrossEntropyLoss()
        # Add weight_decay=1e-4
        optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
        
        # Learning Rate Scheduler
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)
        
        print(f"Starting Training on {device}...")
        
        for epoch in range(NUM_EPOCHS):
            # ==========================================
            # TRAINING PHASE
            # ==========================================
            model.train()
            train_loss = 0.0
            train_correct = 0 
            train_total = 0
            
            for images, labels in train_dl:
                images, labels = images.to(device), labels.to(device)
                
                optimizer.zero_grad()
                
                # GoogLeNet returns (outputs, aux2, aux1) during training when aux_logits=True
                outputs = model(images)
                
                if isinstance(outputs, tuple):
                    # Training mode: unpack main output and auxiliary outputs
                    main_output, aux2_output, aux1_output = outputs
                    loss1 = criterion(main_output, labels)
                    loss2 = criterion(aux2_output, labels)
                    loss3 = criterion(aux1_output, labels)
                    # Combined loss: main output weighted more heavily
                    loss = loss1 + 0.3 * loss2 + 0.3 * loss3
                    outputs = main_output  # Use main output for accuracy calculation
                else:
                    # Inference mode: single output
                    loss = criterion(outputs, labels)
                
                loss.backward()
                optimizer.step()
                
                train_loss += loss.item()
                
                # Track Training Accuracy
                _, predicted = torch.max(outputs, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()

            train_epoch_loss = train_loss / len(train_dl)
            train_epoch_acc = 100 * train_correct / train_total
            
            # ==========================================
            # VALIDATION PHASE
            # ==========================================
            model.eval()
            val_loss = 0.0
            val_correct = 0
            val_total = 0
            
            with torch.no_grad():
                for images, labels in val_dl:
                    images, labels = images.to(device), labels.to(device)
                    
                    outputs = model(images)
                    
                    # In eval mode, model returns single output
                    loss = criterion(outputs, labels)
                    
                    val_loss += loss.item()
                    
                    # Track Validation Accuracy
                    _, predicted = torch.max(outputs, 1)
                    val_total += labels.size(0)
                    val_correct += (predicted == labels).sum().item()
            
            val_epoch_loss = val_loss / len(val_dl)
            val_epoch_acc = 100 * val_correct / val_total
            
            # Print epoch results
            print(f"Epoch {epoch+1}/{NUM_EPOCHS} | "
                  f"Train Loss: {train_epoch_loss:.4f} | Train Acc: {train_epoch_acc:.2f}% | "
                  f"Val Loss: {val_epoch_loss:.4f} | Val Acc: {val_epoch_acc:.2f}%")
            
            # Update LR based on validation loss
            scheduler.step(val_epoch_loss)
            
        print("Training loop finished successfully!")
        print("Saving model...")
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        print(f"Model saved as '{MODEL_SAVE_PATH}'")

    except Exception as e:
        print("\n An error occurred during execution:")
        print(e)
        import traceback
        traceback.print_exc()

Initializing Hugging Face Data Pipeline...
Loading data from Hugging Face Hub...
 Loading 'tanganke/stanford_cars' from Hugging Face Hub...
âœ… Data Split: 6515 Train | 1629 Val | 8041 Test
Initializing Inception V1 (GoogLeNet)...




Starting Training on cuda...
Epoch 1/30 | Train Loss: 8.5357 | Train Acc: 1.24% | Val Loss: 5.1678 | Val Acc: 1.35%
Epoch 2/30 | Train Loss: 8.1163 | Train Acc: 3.94% | Val Loss: 4.5336 | Val Acc: 7.00%
Epoch 3/30 | Train Loss: 7.4344 | Train Acc: 8.58% | Val Loss: 3.7890 | Val Acc: 15.35%
Epoch 4/30 | Train Loss: 6.7890 | Train Acc: 14.89% | Val Loss: 3.1853 | Val Acc: 26.21%
Epoch 5/30 | Train Loss: 6.2688 | Train Acc: 22.46% | Val Loss: 2.7322 | Val Acc: 34.25%
Epoch 6/30 | Train Loss: 5.8048 | Train Acc: 30.44% | Val Loss: 2.3769 | Val Acc: 43.03%
Epoch 7/30 | Train Loss: 5.4418 | Train Acc: 38.08% | Val Loss: 2.1030 | Val Acc: 47.02%
Epoch 8/30 | Train Loss: 5.1428 | Train Acc: 44.51% | Val Loss: 1.8683 | Val Acc: 52.67%
Epoch 9/30 | Train Loss: 4.8604 | Train Acc: 50.44% | Val Loss: 1.7215 | Val Acc: 55.13%
Epoch 10/30 | Train Loss: 4.6291 | Train Acc: 55.66% | Val Loss: 1.5362 | Val Acc: 59.91%
Epoch 11/30 | Train Loss: 4.4227 | Train Acc: 60.83% | Val Loss: 1.4383 | Val Acc: 61