In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler
import copy
import time

# IMPORT YOUR SHARED PREPROCESSING MODULE
from model_utils import UniversalCarDataset, get_transforms

# ==========================================
# 1. CONFIGURATION
# ==========================================
# Data Paths
DATA_DIR = "ready_data_splits"
TRAIN_CSV = os.path.join(DATA_DIR, "train_split.csv")
VAL_CSV = os.path.join(DATA_DIR, "val_split.csv")

# Model Settings
MODEL_SAVE_PATH = 'vgg19_hf_stanford_cars.pth'
NUM_CLASSES = 196
BATCH_SIZE = 32   
NUM_EPOCHS = 30 
LEARNING_RATE = 0.01

# Hardware
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==========================================
# 2. ARCHITECTURE: VGG-19 (From Scratch)
# ==========================================
class VGG19_Scratch(nn.Module):
    def __init__(self, num_classes=196):
        super(VGG19_Scratch, self).__init__()
        
        # Helper: Conv -> BN -> ReLU
        def conv_block(in_ch, out_ch):
            return nn.Sequential(
                nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, bias=False),
                nn.BatchNorm2d(out_ch), # Critical for training deep VGG from scratch
                nn.ReLU(inplace=True)
            )

        # VGG-19 Configuration (2-2-4-4-4)
        self.features = nn.Sequential(
            # Block 1 (2 Convs)
            conv_block(3, 64), conv_block(64, 64),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Block 2 (2 Convs)
            conv_block(64, 128), conv_block(128, 128),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Block 3 (4 Convs)
            conv_block(128, 256), conv_block(256, 256), conv_block(256, 256), conv_block(256, 256),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Block 4 (4 Convs)
            conv_block(256, 512), conv_block(512, 512), conv_block(512, 512), conv_block(512, 512),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Block 5 (4 Convs)
            conv_block(512, 512), conv_block(512, 512), conv_block(512, 512), conv_block(512, 512),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # Classifier Head (Global Average Pooling for parameter efficiency)
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(512, 1024),
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(1024, num_classes)
        )
        
        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.global_pool(x)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        # He/Kaiming Initialization is best for ReLU networks
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None: nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

# ==========================================
# 3. TRAINING ENGINE
# ==========================================
def train():
    print(f"üöÄ Initializing VGG-19 Training on {device}...")
    
    # 1. Load Data (Using Shared Module)
    if not os.path.exists(TRAIN_CSV):
        raise FileNotFoundError(f"‚ùå Splits not found at {TRAIN_CSV}. Run 'prepare_data.py' first.")

    tfms = get_transforms(img_size=(224, 224))
    
    print("   Loading datasets...")
    train_ds = UniversalCarDataset(TRAIN_CSV, transform=tfms['train'])
    val_ds = UniversalCarDataset(VAL_CSV, transform=tfms['val'])
    
    # Pin_memory=True speeds up CPU->GPU transfer
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)
    
    print(f"   Train Images: {len(train_ds)} | Val Images: {len(val_ds)}")

    # 2. Setup Model
    model = VGG19_Scratch(num_classes=NUM_CLASSES).to(device)
    
    # SGD with Momentum is the gold standard for training CNNs from scratch
    optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=5e-4)
    
    # Scheduler: Reduces LR when validation accuracy plateaus
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=5, verbose=True)
    
    criterion = nn.CrossEntropyLoss()
    scaler = GradScaler() # Mixed Precision for speed

    # 3. Training Loop
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())
    start_time = time.time()

    for epoch in range(NUM_EPOCHS):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        # --- TRAINING PHASE ---
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            # AutoCast for Mixed Precision (Faster)
            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Progress bar for large epochs
            if i % 50 == 0:
                print(f"\rEp {epoch+1}: Batch {i}/{len(train_loader)}", end="")

        train_acc = 100 * correct / total
        epoch_loss = running_loss / len(train_loader)
        
        # --- VALIDATION PHASE ---
        model.eval()
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                with autocast():
                    outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        val_acc = 100 * val_correct / val_total
        
        # Scheduler Step
        scheduler.step(val_acc)
        
        # Logging
        print(f"\rEp {epoch+1}/{NUM_EPOCHS}: Train Loss {epoch_loss:.4f} (Acc {train_acc:.1f}%) | Val Acc {val_acc:.2f}%")
        
        # Save Best Model
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), MODEL_SAVE_PATH)
            print(f"   üåü New Best Model Saved ({best_acc:.2f}%)")

    total_time = time.time() - start_time
    print(f" Best Validation Accuracy: {best_acc:.2f}%")
    print(f"\n‚úÖ Training Complete in {total_time // 60:.0f}m {total_time % 60:.0f}s")
    

 
if __name__ == "__main__":
    try:
        train()
    except KeyboardInterrupt:
        print("\nüõë Training interrupted by user.")
    except Exception as e:
        print(f"‚ùå Error: {e}")
        import traceback
        traceback.print_exc()

 Loading 'tanganke/stanford_cars' from Hugging Face Hub...
‚úÖ Data Split: 6515 Train | 1629 Val | 8041 Test
üèóÔ∏è Initializing VGG-19 (Batch Norm)...
üöÄ Training on device: cuda
Epoch 1/30
----------
Train Loss: 5.2108 Acc: 0.0137
Val Loss: 4.9062 Acc: 0.0307
üåü New Best Model Saved! (Acc: 0.0307)
Epoch 2/30
----------
Train Loss: 4.7525 Acc: 0.0350
Val Loss: 4.2757 Acc: 0.0859
üåü New Best Model Saved! (Acc: 0.0859)
Epoch 3/30
----------
Train Loss: 4.2123 Acc: 0.0783
Val Loss: 3.7260 Acc: 0.1369
üåü New Best Model Saved! (Acc: 0.1369)
Epoch 4/30
----------
Train Loss: 3.8241 Acc: 0.1205
Val Loss: 3.4138 Acc: 0.1909
üåü New Best Model Saved! (Acc: 0.1909)
Epoch 5/30
----------
Train Loss: 3.5132 Acc: 0.1610
Val Loss: 3.2801 Acc: 0.2032
üåü New Best Model Saved! (Acc: 0.2032)
Epoch 6/30
----------
Train Loss: 3.2433 Acc: 0.1995
Val Loss: 3.2387 Acc: 0.2032
Epoch 7/30
----------
Train Loss: 3.0356 Acc: 0.2439
Val Loss: 2.9689 Acc: 0.2640
üåü New Best Model Saved! (Acc: 0.264