In [1]:
from Resnet import ResNet, Block, create_pairs, create_tensors
import random
import pandas as pd
import numpy as np
import os
import torch
from torch.nn import TripletMarginLoss
from torchvision.io import decode_image, ImageReadMode
from torch.utils.data import Dataset, Sampler, DataLoader
from torchvision.transforms import v2
from torch.optim import Adam
from collections import defaultdict
import mlflow

In [2]:
"""
creates batches with a size of 15, ensuring we are using all samples provided at random.
"""

def create_batches(label_mapping, n_classes, n_samples):
    all_classes = list(label_mapping.keys())
    batches = []
    label_mapping_copy = label_mapping.copy()
    while True:
        available_classes = [c for c in all_classes if len(label_mapping_copy[c]) > 0] # Keep only classes if they are still available.
        # Exit if we have less
        if len(available_classes) < n_classes: 
            break

        # Ensures that we will at least have 1 example of every label pair
        selected_classes = random.sample(available_classes, n_classes)
        batch = []

        for cls in selected_classes:
            indices = label_mapping_copy[cls]
            # If there are not enough examples to sample from, we re-select a sample from the shortened list. Empty the list after
            if len(indices) >= n_samples:
                chosen = indices[:n_samples]
                label_mapping_copy[cls] = indices[n_samples:]
            else:
                chosen = indices + random.choices(indices, k=(n_samples - len(indices)))
                label_mapping_copy[cls] = []

            batch.extend(chosen)

        batches.append(batch)

    return batches


class BalancedBatchSampler(Sampler):
    def __init__(self, labels, n_classes, n_samples):
        self.labels = labels
        self.n_classes = n_classes 
        self.n_samples = n_samples  

        self.label_mapping = defaultdict(list)
        for idx, label in enumerate(labels):
            self.label_mapping[int(label)].append(idx)

    def __iter__(self):
        for cls in self.label_mapping:
            random.shuffle(self.label_mapping[cls])

        batches = create_batches(self.label_mapping, self.n_classes, self.n_samples)
        for batch in batches:
            yield batch

    def __len__(self):
        batches = create_batches(self.label_mapping, self.n_classes, self.n_samples)
        return len(batches)

In [3]:
class ImageDataset(Dataset):
    def __init__(self, annotation_file, img_dir, transform=None):
        self.img_label = pd.read_csv(annotation_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_label)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_label.iloc[idx, 0])
        image = decode_image(img_path, mode=ImageReadMode.RGB)
        label = self.img_label.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)

        return image, label

In [4]:
transform = v2.Compose([
    v2.Resize(256),
    v2.CenterCrop(224),
    v2.ToDtype(torch.float32, scale=True)])
train_data = ImageDataset('Data/train_data.csv', 'Data/train/', transform)
valid_data = ImageDataset('Data/valid_data.csv', 'Data/train/', transform)
test_data = ImageDataset('Data/test_data.csv', 'Data/train/', transform)

In [5]:
df = pd.read_csv('Data/train_data.csv')
label_tensor = torch.tensor(df['encoded_ground_truth'].values)

sampler = BalancedBatchSampler(label_tensor, 5, 3)

dataloader = DataLoader(train_data, batch_sampler=sampler)

In [6]:
mlflow.set_experiment("Resnet Scratch Test")
mlflow.set_tracking_uri("http://127.0.0.1:5000/")

2026/02/05 17:14:13 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.schemas
2026/02/05 17:14:13 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.tables
2026/02/05 17:14:13 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.types
2026/02/05 17:14:13 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.constraints
2026/02/05 17:14:13 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.defaults
2026/02/05 17:14:13 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.comments
2026/02/05 17:14:13 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/02/05 17:14:13 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2026/02/05 17:14:13 INFO mlflow.tracking.fluent: Experiment with name 'Resnet Scratch Test' does not exist. Creating a new experiment.


In [7]:
mlflow.pytorch.autolog()
mlflow.enable_system_metrics_logging()
with mlflow.start_run():
    params = {'learning_rate': 1e-3, 'epochs': 50, 'output_size': 256, 'batch_size': 15}
    mlflow.log_params(params)
    
    resnet_model = ResNet(Block, [3, 4, 6, 3], image_channels=3)
    loss_fn = TripletMarginLoss()
    optimizer = Adam(resnet_model.parameters(), lr=params['learning_rate'])
    
    for i in range(params['epochs']):
        avg_loss = []
        for batch, (X, y) in enumerate(dataloader):
            pred = resnet_model(X)
            pairs = create_pairs(y, pred)
            anchor_tensor, positive_tensor, negative_tensor = create_tensors(pairs)
            loss = loss_fn(anchor_tensor, positive_tensor, negative_tensor)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            avg_loss.append(loss.item())
        with torch.no_grad():
            val_output = []
            val_label = []
            for j in range(valid_data.__len__()):
                X, y = valid_data[j]
                reshaped_X = torch.reshape(X, (1, 3, 224, 224))
                val_pred = resnet_model(reshaped_X)
                val_output.append(torch.flatten(val_pred))
                val_label.append(y)
            pairs = create_pairs(val_label, val_output)
            anchor_tensor, positive_tensor, negative_tensor = create_tensors(pairs)
            val_loss = loss_fn(anchor_tensor, positive_tensor, negative_tensor)
        print(f"Epoch {i+1}: Avg training loss - {np.mean(avg_loss)}, Avg validation loss - {val_loss}")
        mlflow.log_metric("train_loss", np.mean(avg_loss), step=i+1)
        mlflow.log_metric("valid_loss", val_loss, step=i+1)

2026/02/05 17:14:32 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2026/02/05 17:14:32 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Epoch 1: Avg training loss - 1.3523036351516133, Avg validation loss - 0.9702402353286743
Epoch 2: Avg training loss - 0.7976870521322473, Avg validation loss - 0.9725034832954407
Epoch 3: Avg training loss - 0.7592264455418254, Avg validation loss - 0.9696353077888489
Epoch 4: Avg training loss - 0.7121802882616779, Avg validation loss - 0.964360773563385
Epoch 5: Avg training loss - 0.7150943351998216, Avg validation loss - 0.955269992351532
Epoch 6: Avg training loss - 0.6818292151863982, Avg validation loss - 0.9698925614356995
Epoch 7: Avg training loss - 0.6705553194990864, Avg validation loss - 0.9670665860176086
Epoch 8: Avg training loss - 0.6729383985569448, Avg validation loss - 0.9643304347991943
Epoch 9: Avg training loss - 0.639830176661044, Avg validation loss - 0.9508029818534851
Epoch 10: Avg training loss - 0.6741301938891411, Avg validation loss - 0.951831042766571
Epoch 11: Avg training loss - 0.6189326073652432, Avg validation loss - 0.961796760559082
Epoch 12: Avg

2026/02/05 21:03:33 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2026/02/05 21:03:33 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


Epoch 50: Avg training loss - 0.3359103193860265, Avg validation loss - 0.8361746072769165
üèÉ View run grandiose-kite-447 at: http://127.0.0.1:5000/#/experiments/2/runs/aa2e288c3e2945f0a13bc8f712ed3963
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/2


In [8]:
from numpy import dot 
from numpy.linalg import norm 

def cosine_similarity(v1, v2): 
    return dot(v1, v2) / (norm(v1) * norm(v2))

In [14]:
X1, y1 = test_data[3]
X2, y2 = test_data[0]
X3, y3 = test_data[1]
# 0, 2 same

In [19]:
reshaped_X = torch.reshape(X2, (1, 3, 224, 224))
val_pred = resnet_model(reshaped_X)
val_pred_same1 = torch.flatten(val_pred)

reshaped_X = torch.reshape(X3, (1, 3, 224, 224))
val_pred = resnet_model(reshaped_X)
val_pred_same2 = torch.flatten(val_pred)

In [20]:
cosine_similarity(val_pred_same1.detach(), val_pred_same2.detach())

  return dot(v1, v2) / (norm(v1) * norm(v2))


np.float32(0.9995917)