# Gender Classification using EfficientNet


## Step 1: Import Required Libraries

In [None]:
import os
import random
import shutil
import zipfile
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from PIL import Image

# PyTorch Libraries
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision import datasets, transforms

# Evaluation Metrics
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Albumentations for Augmentation
import albumentations as A
from albumentations.pytorch import ToTensorV2

## Step 2: Load & Preprocess Dataset

In [None]:
#for mounting
# from google.colab import drive

# drive.mount('/content/drive')

#defining the paths of train and test folder
# zip_path = "/content/drive/MyDrive/Colab Notebooks/Comys_Hackathon5.zip"
# extract_path = "/content/drive/MyDrive/Comys_Hackathon5"

# with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#     zip_ref.extractall(extract_path)


train_dir = "/content/drive/MyDrive/Comys_Hackathon5/Comys_Hackathon5/Task_A/train"
test_dir = "/content/drive/MyDrive/Comys_Hackathon5/Comys_Hackathon5/Task_A/val"

class AlbumentationsTransform:
    def __init__(self, transform):
        self.transform = transform

    def __call__(self, image):
        image = np.array(image)
        return self.transform(image=image)["image"]

mean = [0.485, 0.456, 0.406]
std  = [0.229, 0.224, 0.225]

albumentations_train = A.Compose([
    A.Resize(224, 224),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.CLAHE(p=0.3),
    A.RandomShadow(p=0.3),
    A.Normalize(mean=mean, std=std),
    ToTensorV2()
])

albumentations_test = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=mean, std=std),
    ToTensorV2()
])

train_transform = AlbumentationsTransform(albumentations_train)
test_transform  = AlbumentationsTransform(albumentations_test)

train_dataset = datasets.ImageFolder(train_dir, transform=train_transform)
test_dataset  = datasets.ImageFolder(test_dir, transform=test_transform)

print("Classes:", train_dataset.classes)
print("Class-to-Index Mapping:", train_dataset.class_to_idx)

## Step 3: Define Data Loaders

In [None]:
targets = train_dataset.targets
class_sample_count = np.array([np.sum(np.array(targets) == t) for t in np.unique(targets)])
weights = 1. / class_sample_count
sample_weights = np.array([weights[t] for t in targets])
sample_weights = torch.from_numpy(sample_weights).double()

sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler)
test_loader  = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2)

img, label = train_dataset[0]
print("Sample Image Shape:", img.shape)
print("Sample Label:", label)

## Step 4: Model Setup (EfficientNet)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

!pip install -q efficientnet_pytorch
from efficientnet_pytorch import EfficientNet

model = EfficientNet.from_pretrained('efficientnet-b0')
model._fc = nn.Sequential(
    nn.Dropout(p=0.4),
    nn.Linear(model._fc.in_features, 2)
)
model.to(device)

## Step 5: Loss Function & Optimizer

In [None]:
female_count = len(os.listdir(os.path.join(train_dir, "female")))
male_count   = len(os.listdir(os.path.join(train_dir, "male")))
total = female_count + male_count

weights = [total / female_count, total / male_count]
weights = torch.FloatTensor(weights).to(device)

criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

## Step 6: Training & Evaluation Functions

In [None]:
def train_one_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    all_preds, all_labels = [], []

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

    return total_loss / len(dataloader), accuracy_score(all_labels, all_preds), \
           precision_score(all_labels, all_preds), recall_score(all_labels, all_preds), f1_score(all_labels, all_preds)

def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return total_loss / len(dataloader), accuracy_score(all_labels, all_preds), \
           precision_score(all_labels, all_preds), recall_score(all_labels, all_preds), f1_score(all_labels, all_preds)

## Step 7: Train the Model

In [None]:
num_epochs = 5
best_f1 = 0.0

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")

    train_loss, train_acc, train_prec, train_rec, train_f1 = train_one_epoch(
        model, train_loader, optimizer, criterion, device)

    val_loss, val_acc, val_prec, val_rec, val_f1 = evaluate(
        model, test_loader, criterion, device)

    print(f"Train Loss: {train_loss:.4f} | Acc: {train_acc:.4f} | Prec: {train_prec:.4f} | Rec: {train_rec:.4f} | F1: {train_f1:.4f}")
    print(f"Val   Loss: {val_loss:.4f} | Acc: {val_acc:.4f} | Prec: {val_prec:.4f} | Rec: {val_rec:.4f} | F1: {val_f1:.4f}")

    scheduler.step()

    if val_f1 > best_f1:
        best_f1 = val_f1
        torch.save(model.state_dict(), "best_model.pth")
        print("Best model saved!")

## Step 8: Final Evaluation and Confusion Matrix

In [None]:
model.load_state_dict(torch.load("best_model.pth"))
model.eval()

all_preds, all_labels = [], []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        preds = torch.argmax(outputs, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.numpy())

target_names = train_dataset.classes
print(classification_report(all_labels, all_preds, target_names=target_names))

import seaborn as sns
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=target_names, yticklabels=target_names)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

## Step 9: Save and Reload Model for Inference

In [None]:
torch.save(model.state_dict(), "best_model.pth")
torch.save(model, "gender_classification_model.pt")

model = EfficientNet.from_name('efficientnet-b0')
model._fc = nn.Sequential(
    nn.Dropout(0.4),
    nn.Linear(model._fc.in_features, 2)
)
model.load_state_dict(torch.load("best_model.pth"))
model.eval()
model.to(device)

## Step 10: Inference on a New Image

In [None]:
val_transform = AlbumentationsTransform(albumentations_test)

def predict_image(image_path, model, transform, class_names):
    image = Image.open(image_path).convert("RGB")
    image = np.array(image)
    image = transform(image=image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)
        pred = torch.argmax(output, dim=1).item()

    return class_names[pred]

test_path = "/content/drive/MyDrive/Comys_Hackathon5/Comys_Hackathon5/Task_A/train/male/Abraham_Foxman_0001.jpg"
prediction = predict_image(test_path, model, AlbumentationsTransform(albumentations_test), train_dataset.classes)
print(f"Predicted Gender: {prediction}")