<a href="https://colab.research.google.com/github/Jaaeeyyyy/COMSYS-Hackathon/blob/main/TASKA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Complete EfficientNet Gender Classification Code for Hackathon
# Install required packages
!pip install -q torch torchvision scikit-learn

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, WeightedRandomSampler
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
import numpy as np
import os
from collections import Counter

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Set paths
train_dir = '/content/drive/MyDrive/FACECOM/Task_A/train'
val_dir = '/content/drive/MyDrive/FACECOM/Task_A/val'

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Advanced data augmentation for training
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Simple transform for validation (no augmentation)
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load datasets
train_dataset = ImageFolder(train_dir, transform=train_transform)
val_dataset = ImageFolder(val_dir, transform=val_transform)

print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Classes: {train_dataset.classes}")

# Handle class imbalance with weighted sampler
class_counts = Counter(train_dataset.targets)
print(f"Class distribution: {dict(class_counts)}")

# Calculate weights to balance classes
total_samples = len(train_dataset)
class_weights = {}
for cls, count in class_counts.items():
    class_weights[cls] = total_samples / (len(class_counts) * count)

print(f"Class weights: {class_weights}")

# Create sample weights for each training sample
sample_weights = [class_weights[target] for target in train_dataset.targets]
sampler = WeightedRandomSampler(sample_weights, len(sample_weights))

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

# Create EfficientNet-B0 model
model = models.efficientnet_b0(pretrained=True)
print(f"Original classifier: {model.classifier}")

# Replace the classifier for binary classification
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)
model = model.to(device)

print(f"Modified classifier: {model.classifier}")

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)

# Learning rate scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.5, verbose=True)

# Training function with early stopping
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=50, patience=10):
    best_val_acc = 0.0
    patience_counter = 0
    train_losses = []
    val_losses = []
    val_accuracies = []

    print("Starting training...")
    print("=" * 60)

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for batch_idx, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

            # Print progress every 10 batches
            if batch_idx % 10 == 0:
                print(f'Epoch {epoch+1}/{num_epochs}, Batch {batch_idx}/{len(train_loader)}, Loss: {loss.item():.4f}')

        train_acc = 100 * train_correct / train_total
        avg_train_loss = train_loss / len(train_loader)

        # Validation phase
        model.eval()
        val_loss = 0.0
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        # Calculate validation metrics
        val_acc = accuracy_score(all_labels, all_preds) * 100
        avg_val_loss = val_loss / len(val_loader)

        # Store metrics
        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)
        val_accuracies.append(val_acc)

        # Learning rate scheduling
        scheduler.step(avg_val_loss)

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        print(f"Learning Rate: {optimizer.param_groups[0]['lr']:.6f}")
        print("-" * 60)

        # Early stopping check
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
            # Save best model
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_val_acc': best_val_acc,
            }, 'best_gender_model.pth')
            print(f"💾 New best model saved! Validation Accuracy: {best_val_acc:.2f}%")
        else:
            patience_counter += 1
            print(f"⏳ Patience: {patience_counter}/{patience}")

            if patience_counter >= patience:
                print(f"🛑 Early stopping triggered at epoch {epoch+1}")
                print(f"Best validation accuracy: {best_val_acc:.2f}%")
                break

    # Load best model
    checkpoint = torch.load('best_gender_model.pth')
    model.load_state_dict(checkpoint['model_state_dict'])
    print(f"✅ Best model loaded with validation accuracy: {checkpoint['best_val_acc']:.2f}%")

    return model, train_losses, val_losses, val_accuracies

# Detailed evaluation function
def evaluate_model(model, val_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    all_probs = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)

            # Get probabilities
            probs = torch.softmax(outputs, dim=1)
            all_probs.extend(probs.cpu().numpy())

            # Get predictions
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print("🎯" + "="*50)
    print("📊 FINAL EVALUATION METRICS")
    print("="*51)
    print(f"Accuracy  : {accuracy:.4f}")
    print(f"Precision : {precision:.4f}")
    print(f"Recall    : {recall:.4f}")
    print(f"F1 Score  : {f1:.4f}")
    print("="*51)
    print("\n📋 Detailed Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=['female', 'male']))

    # Class-wise accuracy
    class_correct = [0, 0]
    class_total = [0, 0]
    for i in range(len(all_labels)):
        label = all_labels[i]
        class_total[label] += 1
        if all_preds[i] == label:
            class_correct[label] += 1

    print("\n🎯 Class-wise Accuracy:")
    for i, class_name in enumerate(['female', 'male']):
        if class_total[i] > 0:
            acc = 100 * class_correct[i] / class_total[i]
            print(f"{class_name}: {acc:.2f}% ({class_correct[i]}/{class_total[i]})")

    return accuracy, precision, recall, f1

# Start training
print("🚀 Starting EfficientNet-B0 Training for Gender Classification")
print(f"📊 Dataset Info:")
print(f"   Training samples: {len(train_dataset)}")
print(f"   Validation samples: {len(val_dataset)}")
print(f"   Classes: {train_dataset.classes}")
print(f"   Device: {device}")
print("="*60)

# Train the model
trained_model, train_losses, val_losses, val_accuracies = train_model(
    model, train_loader, val_loader, criterion, optimizer, scheduler,
    num_epochs=50, patience=10
)

# Final evaluation
print("\n🏁 FINAL EVALUATION ON VALIDATION SET")
accuracy, precision, recall, f1 = evaluate_model(trained_model, val_loader, device)

# Save final model for submission
torch.save({
    'model_state_dict': trained_model.state_dict(),
    'model_architecture': 'efficientnet_b0',
    'num_classes': 2,
    'class_names': train_dataset.classes,
    'accuracy': accuracy,
    'precision': precision,
    'recall': recall,
    'f1_score': f1
}, 'final_gender_classification_model.pth')

print("\n💾 Final model saved as 'final_gender_classification_model.pth'")
print("✅ Training completed successfully!")

print("\n🎉 ALL DONE! Your model should now perform significantly better than 90.28%!")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m43.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 83.1MB/s]


Original classifier: Sequential(
  (0): Dropout(p=0.2, inplace=True)
  (1): Linear(in_features=1280, out_features=1000, bias=True)
)
Modified classifier: Sequential(
  (0): Dropout(p=0.2, inplace=True)
  (1): Linear(in_features=1280, out_features=2, bias=True)
)
🚀 Starting EfficientNet-B0 Training for Gender Classification
📊 Dataset Info:
   Training samples: 1926
   Validation samples: 422
   Classes: ['female', 'male']
   Device: cpu
Starting training...




Epoch 1/50, Batch 0/61, Loss: 0.6624
Epoch 1/50, Batch 10/61, Loss: 0.3813
Epoch 1/50, Batch 20/61, Loss: 0.2674
Epoch 1/50, Batch 30/61, Loss: 0.1510
Epoch 1/50, Batch 40/61, Loss: 0.1683
Epoch 1/50, Batch 50/61, Loss: 0.1588
Epoch 1/50, Batch 60/61, Loss: 0.7105
Epoch 1/50
Train Loss: 0.2737, Train Acc: 88.53%
Val Loss: 0.2156, Val Acc: 91.00%
Learning Rate: 0.001000
------------------------------------------------------------
💾 New best model saved! Validation Accuracy: 91.00%
Epoch 2/50, Batch 0/61, Loss: 0.1712
Epoch 2/50, Batch 10/61, Loss: 0.3508
Epoch 2/50, Batch 20/61, Loss: 0.1575
Epoch 2/50, Batch 30/61, Loss: 0.2121
Epoch 2/50, Batch 40/61, Loss: 0.0850
Epoch 2/50, Batch 50/61, Loss: 0.0402
Epoch 2/50, Batch 60/61, Loss: 0.0657
Epoch 2/50
Train Loss: 0.1755, Train Acc: 93.15%
Val Loss: 0.2302, Val Acc: 90.28%
Learning Rate: 0.001000
------------------------------------------------------------
⏳ Patience: 1/10
Epoch 3/50, Batch 0/61, Loss: 0.0412
Epoch 3/50, Batch 10/61, Los