In [9]:
import os
import random
from PIL import Image
from collections import defaultdict

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import pickle

In [None]:
print(torch.cuda.is_available())  # True = GPU exists

False


In [None]:
# ========== CONFIG ==========
DATA_DIR = 'tammathon-task-1\\train\\train\\' #change here to your folder
BATCH_SIZE = 32
NUM_EPOCHS = 10
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
N = 999999 # how many folders to use (for testing purposes you can set N = 100)

In [None]:
%%time
# ========== STEP 1: LOAD & SPLIT DATA ==========
def load_image_paths(data_dir):
    dirs = os.listdir(data_dir)
    label_names = dirs[:N]
    print(len(label_names))
    
    label_to_idx = {label: idx for idx, label in enumerate(label_names)}
    idx_to_label = {idx: label for label, idx in label_to_idx.items()}

    train_data = []
    val_data = []

    for i, label_name in enumerate(label_names):
        label_path = os.path.join(data_dir, label_name)
        image_paths = [os.path.join(label_path, f) for f in os.listdir(label_path) if f.endswith('.png')]
        
        if len(image_paths) >= 3:
            train_data.extend((img, label_to_idx[label_name]) for img in image_paths[1:])
            val_data.append((image_paths[0], label_to_idx[label_name]))
        else:
            train_data.extend((img, label_to_idx[label_name]) for img in image_paths[:])
        
        if not i % 10000:
            print(i)

    return train_data, val_data, label_to_idx, idx_to_label


train_data, val_data, label_to_idx, idx_to_label = load_image_paths(DATA_DIR)
num_classes = len(label_to_idx)

10
0
CPU times: total: 62.5 ms
Wall time: 71.8 ms


In [15]:
# ========== STEP 2: DEFINE DATASET ==========

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

class CatDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

train_dataset = CatDataset(train_data, transform=transform)
val_dataset = CatDataset(val_data, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)


In [16]:
# ========== STEP 3: MODEL SETUP ==========

model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)



In [17]:
# ========== STEP 4: TRAINING LOOP ==========

for epoch in range(NUM_EPOCHS):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
    result_vars = {'model': model,
                   'running_loss': running_loss,
                   'optimizer': optimizer,
                   'outputs': outputs,
                   'loss': loss}
        
    with open(f'result_vars_epoch{epoch+1}.pkl', 'wb') as f:
        pickle.dump(result_vars, f)

    print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Loss: {running_loss / len(train_loader):.4f}")

Epoch 1/10, Loss: 2.3615
Epoch 2/10, Loss: 1.1363
Epoch 3/10, Loss: 1.1422
Epoch 4/10, Loss: 0.5658
Epoch 5/10, Loss: 0.5271
Epoch 6/10, Loss: 0.4517
Epoch 7/10, Loss: 0.2322
Epoch 8/10, Loss: 0.1679
Epoch 9/10, Loss: 0.3401
Epoch 10/10, Loss: 0.3134


In [20]:

# ========== STEP 5: VALIDATION & PREDICTION ==========

model.eval()
top3_predictions = []

with torch.no_grad():
    for image, true_label in val_loader:
        image = image.to(DEVICE)
        outputs = model(image)
        probs = F.softmax(outputs, dim=1)
        top3 = torch.topk(probs, 3)
        top3_idxs = top3.indices.cpu().numpy()[0]
        top3_labels = [idx_to_label[i] for i in top3_idxs]
        top3_predictions.append(top3_labels)


In [22]:
# ========== STEP 6: OUTPUT RESULTS ==========

for i, (img_path, true_label) in enumerate(val_data[:5]):
    true_label_str = idx_to_label[true_label]
    print(f"Image: {img_path}")
    print(f"True Label: {true_label_str}")
    print(f"Top 3 Predicted Labels: {top3_predictions[i]}")
    print("-" * 40)

Image: tammathon-task-1\train\train\000000\00.png
True Label: 000000
Top 3 Predicted Labels: ['000000', '000008', '000009']
----------------------------------------
Image: tammathon-task-1\train\train\000001\00.png
True Label: 000001
Top 3 Predicted Labels: ['000002', '000001', '000009']
----------------------------------------
Image: tammathon-task-1\train\train\000002\00.png
True Label: 000002
Top 3 Predicted Labels: ['000002', '000006', '000005']
----------------------------------------
Image: tammathon-task-1\train\train\000003\00.png
True Label: 000003
Top 3 Predicted Labels: ['000003', '000005', '000002']
----------------------------------------
Image: tammathon-task-1\train\train\000004\00.png
True Label: 000004
Top 3 Predicted Labels: ['000004', '000000', '000002']
----------------------------------------


In [23]:
# Calculate top-3 accuracy
correct = 0
total = len(val_data)

for i, (_, true_label) in enumerate(val_data):
    predicted_labels = top3_predictions[i]
    true_label_str = idx_to_label[true_label]
    if true_label_str in predicted_labels:
        correct += 1

top3_accuracy = correct / total
print(f"Top-3 Accuracy: {top3_accuracy:.4f}")

Top-3 Accuracy: 1.0000
