# 🟦 Baseline ResNet-18 Animal Classifier

This notebook implements the initial baseline ResNet-18 model for Phase 1 and Phase 2 submission.

**Submission Format:**
- phase1_predictions.csv (labeled data only)
- phase2_predictions.csv (labeled + unlabeled data)

**CSV Format:**
```
path,predicted_label
test_img001.jpg,class_2
test_img002.jpg,class_5
test_img003.jpg,class_1
...
```

---

In [1]:
# Environment Detection
try:
    import google.colab
    IN_COLAB = True
    print("Google Colab detected")
except ImportError:
    IN_COLAB = False
    print("Local Jupyter detected")
BASE_PATH = '/content' if IN_COLAB else '.'

Local Jupyter detected


In [3]:
# Install required packages
import sys, subprocess
def install_packages():
    pkgs = ['torch', 'torchvision', 'pandas', 'numpy', 'pillow', 'scikit-learn', 'tqdm', 'requests']
    if IN_COLAB:
        pkgs.append('gdown')
    for pkg in pkgs:
        try:
            subprocess.run([sys.executable, '-m', 'pip', 'install', pkg], check=True, capture_output=True)
        except Exception as e:
            print(f'Could not install {pkg}: {e}')
install_packages()

In [4]:
# Data Download
if IN_COLAB:
    import gdown, os
    gdown.download('https://drive.google.com/uc?id=18MA0qKg1rqP92HApr_Fjck7Zo4Bwdqdu', f'{BASE_PATH}/HV-AI-2025.zip', quiet=False)
    os.system(f'cd {BASE_PATH} && unzip -q HV-AI-2025.zip')
    os.system(f'rm -rf {BASE_PATH}/__MACOSX')
    os.system(f'mv {BASE_PATH}/HV-AI-2025/* {BASE_PATH}/')
    os.system(f'rm -rf {BASE_PATH}/HV-AI-2025 {BASE_PATH}/HV-AI-2025.zip')
    gdown.download('https://drive.google.com/uc?id=1aszVlQFQOwJTy9tt79s7x87VJyYw-Sxy', f'{BASE_PATH}/HV-AI-2025-Test.zip', quiet=False)
    os.system(f'cd {BASE_PATH} && unzip -q HV-AI-2025-Test.zip')
    os.system(f'rm -rf {BASE_PATH}/__MACOSX')
    os.system(f'mv {BASE_PATH}/HV-AI-2025-Test/* {BASE_PATH}/')
    os.system(f'rm -rf {BASE_PATH}/HV-AI-2025-Test {BASE_PATH}/HV-AI-2025-Test.zip')
else:
    print("Assuming data is present in HV-AI-2025/ and test_data/ folders locally.")

Assuming data is present in HV-AI-2025/ and test_data/ folders locally.


In [5]:
# Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import torchvision.models as models
import pandas as pd
import numpy as np
from PIL import Image
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [6]:
# Device
if torch.cuda.is_available():
    device = torch.device('cuda')
    print('Using CUDA')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
    print('Using Metal (MPS)')
else:
    device = torch.device('cpu')
    print('Using CPU')

Using Metal (MPS)


In [7]:
# Data
df = pd.read_csv(f'{BASE_PATH}/labeled_data/labeled_data.csv')
label_encoder = LabelEncoder()
df['encoded_label'] = label_encoder.fit_transform(df['label'])
num_classes = len(label_encoder.classes_)

In [8]:
# Dataset
class AnimalDataset(Dataset):
    def __init__(self, dataframe, images_dir, transform=None):
        self.dataframe = dataframe
        self.images_dir = images_dir
        self.transform = transform
    def __len__(self):
        return len(self.dataframe)
    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx]['img_name']
        img_path = os.path.join(self.images_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        label = self.dataframe.iloc[idx]['encoded_label']
        if self.transform:
            image = self.transform(image)
        return image, label

In [9]:
# Transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [10]:
# Split
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label'])
train_dataset = AnimalDataset(train_df.reset_index(drop=True), f'{BASE_PATH}/labeled_data/images', train_transform)
val_dataset = AnimalDataset(val_df.reset_index(drop=True), f'{BASE_PATH}/labeled_data/images', val_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0)

In [11]:
# Model
model = models.resnet18(weights='IMAGENET1K_V1')
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

In [12]:
# Loss, Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [13]:
# Training Loop
def train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=10):
    best_acc = 0
    for epoch in range(epochs):
        model.train()
        running_loss, correct, total = 0, 0, 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
        train_acc = 100 * correct / total
        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)
        val_acc = 100 * val_correct / val_total
        print(f'Epoch {epoch+1}: Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%')
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), f'{BASE_PATH}/best_resnet18.pth')
    print(f'Best Val Acc: {best_acc:.2f}%')
    return model
model = train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=10)

100%|██████████| 20/20 [00:03<00:00,  6.11it/s]



Epoch 1: Train Acc: 56.98%, Val Acc: 30.77%


100%|██████████| 20/20 [00:02<00:00,  7.40it/s]
100%|██████████| 20/20 [00:02<00:00,  7.40it/s]


Epoch 2: Train Acc: 73.52%, Val Acc: 46.15%


100%|██████████| 20/20 [00:02<00:00,  7.58it/s]



Epoch 3: Train Acc: 81.86%, Val Acc: 62.82%


100%|██████████| 20/20 [00:02<00:00,  7.40it/s]



Epoch 4: Train Acc: 89.25%, Val Acc: 64.10%


100%|██████████| 20/20 [00:02<00:00,  7.46it/s]



Epoch 5: Train Acc: 91.17%, Val Acc: 48.72%


100%|██████████| 20/20 [00:02<00:00,  7.47it/s]



Epoch 6: Train Acc: 93.42%, Val Acc: 64.74%


100%|██████████| 20/20 [00:02<00:00,  7.39it/s]



Epoch 7: Train Acc: 95.67%, Val Acc: 64.10%


100%|██████████| 20/20 [00:02<00:00,  7.53it/s]
100%|██████████| 20/20 [00:02<00:00,  7.53it/s]


Epoch 8: Train Acc: 94.22%, Val Acc: 57.69%


100%|██████████| 20/20 [00:02<00:00,  7.62it/s]
100%|██████████| 20/20 [00:02<00:00,  7.62it/s]


Epoch 9: Train Acc: 93.90%, Val Acc: 66.03%


100%|██████████| 20/20 [00:02<00:00,  7.62it/s]



Epoch 10: Train Acc: 93.74%, Val Acc: 53.21%
Best Val Acc: 66.03%


In [14]:
# Inference for Submission
def predict_and_save(model, test_dir, label_encoder, output_csv):
    model.eval()
    results = []
    for fname in sorted(os.listdir(test_dir)):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
            img = Image.open(os.path.join(test_dir, fname)).convert('RGB')
            img_tensor = val_transform(img).unsqueeze(0).to(device)
            with torch.no_grad():
                output = model(img_tensor)
                pred = output.argmax(1).item()
                pred_label = label_encoder.inverse_transform([pred])[0]
            results.append({'path': fname, 'predicted_label': pred_label})
    pd.DataFrame(results).to_csv(output_csv, index=False)
    print(f'Saved predictions to {output_csv}')

In [15]:
# Phase 1 Submission
test_dir = f'{BASE_PATH}/test_images'  # Updated path for test images
model.load_state_dict(torch.load(f'{BASE_PATH}/best_resnet18.pth', map_location=device))
predict_and_save(model, test_dir, label_encoder, 'phase1_predictions.csv')

Saved predictions to phase1_predictions.csv


In [16]:
# Test Model Performance
def test_model(model, val_loader, device):
    model.eval()
    correct = 0
    total = 0
    class_correct = list(0. for i in range(num_classes))
    class_total = list(0. for i in range(num_classes))
    
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Per-class accuracy
            c = (predicted == labels).squeeze()
            for i in range(labels.size(0)):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1
    
    # Overall accuracy
    print(f'Overall Test Accuracy: {100 * correct / total:.2f}%')
    
    # Per-class accuracy
    print('\nPer-class Accuracy:')
    for i in range(num_classes):
        class_name = label_encoder.inverse_transform([i])[0]
        if class_total[i] > 0:
            acc = 100 * class_correct[i] / class_total[i]
            print(f'{class_name}: {acc:.2f}% ({int(class_correct[i])}/{int(class_total[i])})')

# Load best model and test
model.load_state_dict(torch.load(f'{BASE_PATH}/best_resnet18.pth', map_location=device))
test_model(model, val_loader, device)

Overall Test Accuracy: 66.03%

Per-class Accuracy:
cane: 93.10% (27/29)
cavallo: 50.00% (8/16)
elefante: 62.50% (5/8)
farfalla: 69.23% (9/13)
gallina: 55.56% (10/18)
gatto: 30.00% (3/10)
mucca: 54.55% (6/11)
pecora: 27.27% (3/11)
ragno: 89.66% (26/29)
scoiattolo: 54.55% (6/11)


In [17]:
# Phase 2: Pseudo-Labeling Dataset
class UnlabeledDataset(Dataset):
    def __init__(self, images_dir, transform=None):
        self.images_dir = images_dir
        self.transform = transform
        self.image_files = [f for f in os.listdir(images_dir) 
                           if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, img_name

# Generate pseudo labels for unlabeled data
def generate_pseudo_labels(model, unlabeled_loader, confidence_threshold=0.9):
    model.eval()
    pseudo_labels = []
    
    with torch.no_grad():
        for images, img_names in tqdm(unlabeled_loader, desc="Generating pseudo labels"):
            images = images.to(device)
            outputs = model(images)
            probs = torch.softmax(outputs, dim=1)
            max_probs, predicted = torch.max(probs, 1)
            
            for i, (prob, pred, img_name) in enumerate(zip(max_probs, predicted, img_names)):
                if prob.item() >= confidence_threshold:
                    pred_label = label_encoder.inverse_transform([pred.item()])[0]
                    pseudo_labels.append({
                        'img_name': img_name,
                        'label': pred_label,
                        'encoded_label': pred.item(),
                        'confidence': prob.item()
                    })
    
    return pd.DataFrame(pseudo_labels)

In [18]:
# Phase 2 Training with Pseudo Labels
def train_phase2(model, labeled_df, pseudo_df, epochs=5, confidence_threshold=0.9):
    # Load unlabeled data and generate pseudo labels
    unlabeled_dir = f'{BASE_PATH}/unlabeled_data/images'
    unlabeled_dataset = UnlabeledDataset(unlabeled_dir, val_transform)
    unlabeled_loader = DataLoader(unlabeled_dataset, batch_size=32, shuffle=False, num_workers=0)
    
    print("Generating pseudo labels...")
    pseudo_df = generate_pseudo_labels(model, unlabeled_loader, confidence_threshold)
    print(f"Generated {len(pseudo_df)} pseudo labels with confidence >= {confidence_threshold}")
    
    # Combine labeled and pseudo-labeled data
    combined_df = pd.concat([labeled_df, pseudo_df], ignore_index=True)
    print(f"Combined dataset size: {len(combined_df)} (labeled: {len(labeled_df)}, pseudo: {len(pseudo_df)})")
    
    # Create combined datasets
    combined_train_df, combined_val_df = train_test_split(
        combined_df, test_size=0.15, random_state=42, stratify=combined_df['label']
    )
    
    # Update image directories for datasets
    def get_image_dir(img_name, labeled_dir, unlabeled_dir):
        if os.path.exists(os.path.join(labeled_dir, img_name)):
            return labeled_dir
        else:
            return unlabeled_dir
    
    class CombinedDataset(Dataset):
        def __init__(self, dataframe, labeled_dir, unlabeled_dir, transform=None):
            self.dataframe = dataframe
            self.labeled_dir = labeled_dir
            self.unlabeled_dir = unlabeled_dir
            self.transform = transform
        
        def __len__(self):
            return len(self.dataframe)
        
        def __getitem__(self, idx):
            row = self.dataframe.iloc[idx]
            img_name = row['img_name']
            
            # Check if image is in labeled or unlabeled directory
            labeled_path = os.path.join(self.labeled_dir, img_name)
            if os.path.exists(labeled_path):
                img_path = labeled_path
            else:
                img_path = os.path.join(self.unlabeled_dir, img_name)
            
            image = Image.open(img_path).convert('RGB')
            label = row['encoded_label']
            
            if self.transform:
                image = self.transform(image)
            
            return image, label
    
    # Create combined dataloaders
    combined_train_dataset = CombinedDataset(
        combined_train_df.reset_index(drop=True),
        f'{BASE_PATH}/labeled_data/images',
        f'{BASE_PATH}/unlabeled_data/images',
        train_transform
    )
    combined_val_dataset = CombinedDataset(
        combined_val_df.reset_index(drop=True),
        f'{BASE_PATH}/labeled_data/images',
        f'{BASE_PATH}/unlabeled_data/images',
        val_transform
    )
    
    combined_train_loader = DataLoader(combined_train_dataset, batch_size=32, shuffle=True, num_workers=0)
    combined_val_loader = DataLoader(combined_val_dataset, batch_size=32, shuffle=False, num_workers=0)
    
    # Fine-tune with lower learning rate
    optimizer_phase2 = optim.Adam(model.parameters(), lr=0.0001)
    
    print("\\nStarting Phase 2 training...")
    best_acc = 0
    for epoch in range(epochs):
        model.train()
        running_loss, correct, total = 0, 0, 0
        
        for images, labels in tqdm(combined_train_loader, desc=f"Epoch {epoch+1}"):
            images, labels = images.to(device), labels.to(device)
            optimizer_phase2.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer_phase2.step()
            
            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
        
        train_acc = 100 * correct / total
        
        # Validation
        model.eval()
        val_correct, val_total = 0, 0
        with torch.no_grad():
            for images, labels in combined_val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)
        
        val_acc = 100 * val_correct / val_total
        print(f'Phase 2 Epoch {epoch+1}: Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%')
        
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), f'{BASE_PATH}/best_resnet18_phase2.pth')
    
    print(f'Phase 2 Best Val Acc: {best_acc:.2f}%')
    return model

# Run Phase 2 training
print("Starting Phase 2 training with pseudo-labeling...")
model = train_phase2(model, df, None, epochs=5, confidence_threshold=0.85)

Starting Phase 2 training with pseudo-labeling...
Generating pseudo labels...


Generating pseudo labels: 100%|██████████| 463/463 [00:38<00:00, 11.96it/s]
Generating pseudo labels: 100%|██████████| 463/463 [00:38<00:00, 11.96it/s]


Generated 5986 pseudo labels with confidence >= 0.85
Combined dataset size: 6765 (labeled: 779, pseudo: 5986)
\nStarting Phase 2 training...


Epoch 1: 100%|██████████| 180/180 [00:26<00:00,  6.67it/s]



Phase 2 Epoch 1: Train Acc: 89.74%, Val Acc: 96.06%


Epoch 2: 100%|██████████| 180/180 [00:25<00:00,  6.92it/s]



Phase 2 Epoch 2: Train Acc: 95.36%, Val Acc: 96.65%


Epoch 3: 100%|██████████| 180/180 [00:25<00:00,  6.98it/s]



Phase 2 Epoch 3: Train Acc: 96.59%, Val Acc: 95.57%


Epoch 4: 100%|██████████| 180/180 [00:26<00:00,  6.84it/s]



Phase 2 Epoch 4: Train Acc: 96.82%, Val Acc: 95.96%


Epoch 5: 100%|██████████| 180/180 [00:26<00:00,  6.89it/s]



Phase 2 Epoch 5: Train Acc: 97.81%, Val Acc: 94.78%
Phase 2 Best Val Acc: 96.65%


In [19]:
# Test Phase 2 Model
print("Testing Phase 2 model performance...")
model.load_state_dict(torch.load(f'{BASE_PATH}/best_resnet18_phase2.pth', map_location=device))
test_model(model, val_loader, device)

Testing Phase 2 model performance...
Overall Test Accuracy: 85.90%

Per-class Accuracy:
cane: 96.55% (28/29)
cavallo: 81.25% (13/16)
elefante: 100.00% (8/8)
farfalla: 84.62% (11/13)
gallina: 83.33% (15/18)
gatto: 90.00% (9/10)
mucca: 63.64% (7/11)
pecora: 72.73% (8/11)
ragno: 93.10% (27/29)
scoiattolo: 72.73% (8/11)
Overall Test Accuracy: 85.90%

Per-class Accuracy:
cane: 96.55% (28/29)
cavallo: 81.25% (13/16)
elefante: 100.00% (8/8)
farfalla: 84.62% (11/13)
gallina: 83.33% (15/18)
gatto: 90.00% (9/10)
mucca: 63.64% (7/11)
pecora: 72.73% (8/11)
ragno: 93.10% (27/29)
scoiattolo: 72.73% (8/11)


In [20]:
# Phase 2 Submission (Labeled + Unlabeled)
print("Generating Phase 2 predictions...")
test_dir = f'{BASE_PATH}/test_images'  # Updated path for test images
model.load_state_dict(torch.load(f'{BASE_PATH}/best_resnet18_phase2.pth', map_location=device))
predict_and_save(model, test_dir, label_encoder, 'phase2_predictions.csv')

Generating Phase 2 predictions...
Saved predictions to phase2_predictions.csv
Saved predictions to phase2_predictions.csv


In [39]:
import requests

def send_results_for_evaluation(name, csv_file, email):
    url = "http://43.205.49.236:5050/inference"
    files = {'file': open(csv_file, 'rb')}
    data = {'email': email, 'name':name}
    response = requests.post(url, files=files, data=data)
    response.raise_for_status()
    return response.json()



In [42]:

print('Accuracy: ')
print(send_results_for_evaluation('Hariharan Mudaliar', '/Users/hariharan/Hiring/HyperVerge/phase1_predictions.csv', 'hm4144@srmist.edu.in'))




Accuracy: 
{'accuracy': 36.37}
