In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
import copy
import time
from data_preprocessing import get_dataloaders

# ==========================================
# 1. CONFIGURATION
# ==========================================
MODEL_SAVE_PATH = 'vgg19_scratch_stanford_cars_20classes.pth'
NUM_CLASSES = 20  # Updated to 20 classes
BATCH_SIZE = 32   
NUM_EPOCHS = 30 
LEARNING_RATE = 0.01
IMG_SIZE = 224

# Hardware
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==========================================
# 2. ARCHITECTURE: VGG-19 (From Scratch)
# ==========================================
class VGG19_Scratch(nn.Module):
    def __init__(self, num_classes=20):
        super(VGG19_Scratch, self).__init__()
        
        # Helper: Conv -> BN -> ReLU
        def conv_block(in_ch, out_ch):
            return nn.Sequential(
                nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, bias=False),
                nn.BatchNorm2d(out_ch), # Critical for training deep VGG from scratch
                nn.ReLU(inplace=True)
            )

        # VGG-19 Configuration (2-2-4-4-4)
        self.features = nn.Sequential(
            # Block 1 (2 Convs)
            conv_block(3, 64), conv_block(64, 64),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Block 2 (2 Convs)
            conv_block(64, 128), conv_block(128, 128),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Block 3 (4 Convs)
            conv_block(128, 256), conv_block(256, 256), conv_block(256, 256), conv_block(256, 256),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Block 4 (4 Convs)
            conv_block(256, 512), conv_block(512, 512), conv_block(512, 512), conv_block(512, 512),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Block 5 (4 Convs)
            conv_block(512, 512), conv_block(512, 512), conv_block(512, 512), conv_block(512, 512),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # Classifier Head (Global Average Pooling for parameter efficiency)
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(512, 1024),
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(1024, num_classes)
        )
        
        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.global_pool(x)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        # He/Kaiming Initialization is best for ReLU networks
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None: nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

# ==========================================
# 3. VALIDATION FUNCTION
# ==========================================
def validate_model(model, val_loader, criterion, device):
    """
    Evaluates the model on the validation set.
    
    Returns:
        val_loss: Average validation loss
        val_acc: Validation accuracy (%)
    """
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            
            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()
    
    val_loss = running_loss / len(val_loader)
    val_acc = 100 * correct_predictions / total_samples
    
    return val_loss, val_acc

# ==========================================
# 4. TRAINING ENGINE
# ==========================================
def train():
    print("=" * 60)
    print("üöÄ VGG-19 From Scratch Training on 20 Random Classes")
    print("=" * 60)
    
    try:
        # 1. Load Data using HF pipeline
        print("\nüì¶ Loading data from Hugging Face Hub...")
        train_loader, val_loader, test_loader, selected_classes, label_mapping = get_dataloaders(
            batch_size=BATCH_SIZE, 
            img_size=IMG_SIZE, 
            num_workers=0,
            num_classes=NUM_CLASSES,
            seed=42
        )
        
        print(f"\nüéØ Selected Classes: {selected_classes}")
        print(f"üìä Number of Classes: {len(selected_classes)}")
        print(f"üíª Using device: {device}")

        # 2. Setup Model
        print("\nü§ñ Initializing VGG-19 from scratch...")
        model = VGG19_Scratch(num_classes=NUM_CLASSES).to(device)
        
        # SGD with Momentum is the gold standard for training CNNs from scratch
        optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=5e-4)
        
        # Scheduler: Reduces LR when validation accuracy plateaus
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.1, patience=5, verbose=True
        )
        
        criterion = nn.CrossEntropyLoss()
        scaler = GradScaler()  # Mixed Precision for speed

        # 3. Training Loop
        best_val_acc = 0.0
        best_model_wts = copy.deepcopy(model.state_dict())
        start_time = time.time()

        print(f"\nüèãÔ∏è Starting Training for {NUM_EPOCHS} epochs...")
        print("=" * 60)

        for epoch in range(NUM_EPOCHS):
            # ==========================================
            # TRAINING PHASE
            # ==========================================
            model.train()
            running_loss = 0.0
            correct = 0
            total = 0
            
            for i, (images, labels) in enumerate(train_loader):
                images, labels = images.to(device), labels.to(device)
                
                optimizer.zero_grad()
                
                # AutoCast for Mixed Precision (Faster)
                with autocast():
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
                
                running_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            train_loss = running_loss / len(train_loader)
            train_acc = 100 * correct / total
            
            # ==========================================
            # VALIDATION PHASE
            # ==========================================
            val_loss, val_acc = validate_model(model, val_loader, criterion, device)
            
            # Update Scheduler
            scheduler.step(val_acc)
            
            # Print Epoch Results
            print(f"Epoch [{epoch+1:2d}/{NUM_EPOCHS}] | "
                  f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
                  f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
            
            # Save Best Model
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save({
                    'epoch': epoch + 1,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'val_acc': val_acc,
                    'val_loss': val_loss,
                    'selected_classes': selected_classes,
                    'label_mapping': label_mapping
                }, MODEL_SAVE_PATH)
                print(f"  ‚úÖ Best model saved! (Val Acc: {val_acc:.2f}%)")

        total_time = time.time() - start_time
        
        print("\n" + "=" * 60)
        print("‚úÖ Training completed successfully!")
        print(f"üèÜ Best Validation Accuracy: {best_val_acc:.2f}%")
        print(f"‚è±Ô∏è  Total Training Time: {total_time // 60:.0f}m {total_time % 60:.0f}s")
        print(f"üíæ Model saved as '{MODEL_SAVE_PATH}'")
        print("=" * 60)

        # ==========================================
        # FINAL TEST EVALUATION
        # ==========================================
        print("\nüß™ Evaluating on Test Set...")
        checkpoint = torch.load(MODEL_SAVE_PATH)
        model.load_state_dict(checkpoint['model_state_dict'])
        test_loss, test_acc = validate_model(model, test_loader, criterion, device)
        print(f"üìà Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.2f}%")
        
    except Exception as e:
        print(f"\n‚ùå Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    try:
        train()
    except KeyboardInterrupt:
        print("\nüõë Training interrupted by user.")

  from .autonotebook import tqdm as notebook_tqdm


üöÄ VGG-19 From Scratch Training on 20 Random Classes

üì¶ Loading data from Hugging Face Hub...
üöÄ Loading 'tanganke/stanford_cars' from Hugging Face Hub...
üìä Total classes in dataset: 196
üéØ Selected 20 random classes: [6, 7, 8, 22, 23, 26, 28, 35, 55, 57, 59, 62, 70, 108, 139, 151, 163, 173, 188, 189]
‚úÖ Filtered dataset size: 829 samples
üìä Total classes in dataset: 196
üéØ Selected 20 random classes: [6, 7, 8, 22, 23, 26, 28, 35, 55, 57, 59, 62, 70, 108, 139, 151, 163, 173, 188, 189]
‚úÖ Filtered dataset size: 820 samples
‚úÖ Data Split: 663 Train | 166 Val | 820 Test
üìå Classes remapped to range: 0-19

üéØ Selected Classes: [6, 7, 8, 22, 23, 26, 28, 35, 55, 57, 59, 62, 70, 108, 139, 151, 163, 173, 188, 189]
üìä Number of Classes: 20
üíª Using device: cuda

ü§ñ Initializing VGG-19 from scratch...


  scaler = GradScaler()  # Mixed Precision for speed



üèãÔ∏è Starting Training for 30 epochs...


  with autocast():
  with autocast():


Epoch [ 1/30] | Train Loss: 3.0240 | Train Acc: 4.98% | Val Loss: nan | Val Acc: 3.61%
  ‚úÖ Best model saved! (Val Acc: 3.61%)
Epoch [ 2/30] | Train Loss: 3.0150 | Train Acc: 5.28% | Val Loss: nan | Val Acc: 4.22%
  ‚úÖ Best model saved! (Val Acc: 4.22%)
Epoch [ 3/30] | Train Loss: 2.9796 | Train Acc: 7.09% | Val Loss: nan | Val Acc: 6.63%
  ‚úÖ Best model saved! (Val Acc: 6.63%)
Epoch [ 4/30] | Train Loss: 2.9576 | Train Acc: 6.33% | Val Loss: nan | Val Acc: 6.02%
Epoch [ 5/30] | Train Loss: 2.9006 | Train Acc: 7.99% | Val Loss: nan | Val Acc: 12.05%
  ‚úÖ Best model saved! (Val Acc: 12.05%)
Epoch [ 6/30] | Train Loss: 2.8727 | Train Acc: 8.75% | Val Loss: nan | Val Acc: 9.64%
Epoch [ 7/30] | Train Loss: 2.8504 | Train Acc: 10.11% | Val Loss: nan | Val Acc: 12.05%
Epoch [ 8/30] | Train Loss: 2.8525 | Train Acc: 11.61% | Val Loss: nan | Val Acc: 12.05%
Epoch [ 9/30] | Train Loss: 2.8510 | Train Acc: 8.90% | Val Loss: nan | Val Acc: 13.86%
  ‚úÖ Best model saved! (Val Acc: 13.86%)
Epoc

  checkpoint = torch.load(MODEL_SAVE_PATH)


üìà Test Loss: 2.5539 | Test Accuracy: 23.66%
