In [None]:
# METRIC LEARNING FINE-TUNING

#Importing libraries and setup
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import pytorch_lightning as pl
from PIL import Image
import pandas as pd
from pathlib import Path
import torch.nn.functional as F
import random


# Dataset with error handling: Triplet Sampler (Anchor, Positive, Negative)
class TripletDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df.reset_index(drop=True)
        self.groups = df.groupby("item_id")
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        a_row = self.df.iloc[idx]

        try:
            a_img = self.transform(Image.open(a_row.image_path).convert("RGB"))
        except Exception as e:
            print(f"‚ö†Ô∏è Skipped bad image: {a_row.image_path} ({e})")
            return self.__getitem__((idx + 1) % len(self.df))

        positives = self.groups.get_group(a_row.item_id)
        p_row = positives.sample().iloc[0]
        p_img = self.transform(Image.open(p_row.image_path).convert("RGB"))

        neg_row = self.df[self.df.item_id != a_row.item_id].sample().iloc[0]
        n_img = self.transform(Image.open(neg_row.image_path).convert("RGB"))

        return a_img, p_img, n_img


    
# Triplet Loss function
def triplet_loss(a, p, n, margin=0.2):
    d_ap = F.pairwise_distance(a, p)
    d_an = F.pairwise_distance(a, n)
    return F.relu(d_ap - d_an + margin).mean()



# Lightning Model with metric learning
class TripletModel(pl.LightningModule):
    def __init__(self, lr=1e-4):
        super().__init__()
        base = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        base.fc = torch.nn.Linear(base.fc.in_features, 512)
        self.model = base
        self.lr = lr

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        a, p, n = batch
        a_e, p_e, n_e = self(a), self(p), self(n)
        loss = triplet_loss(a_e, p_e, n_e)
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)


# Load dataset safely
index_path = Path(r"C:\Users\dilku\deepfashion-recsys\data\deepfashion_index.csv")
df = pd.read_csv(index_path)

# Match case-insensitive 'train'
train_df = df[df.split.str.contains("train", case=False)]
print(f"‚úÖ Found {len(train_df)} training images")

if len(train_df) == 0:
    raise ValueError("‚ùå No training samples found. Check your split column values!")

    
# Image transforms
tfs = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.ColorJitter(0.2,0.2,0.2,0.1),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

ds = TripletDataset(train_df, tfs)
print(f"‚úÖ Dataset length: {len(ds)}")

dl = DataLoader(ds, batch_size=16, shuffle=True, num_workers=0)  # üëà critical fix


# Trainer setup (progress bar + safe config)
model = TripletModel(lr=1e-4)
trainer = pl.Trainer(
    max_epochs=3,                # starting small test first!
    accelerator="auto",
    log_every_n_steps=10,
    enable_progress_bar=True
)
trainer.fit(model, dl)


In [None]:
# Metric Learning Fine-tuning
# With Auto Checkpoint Save & Resume

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from PIL import Image
import pandas as pd
import random
from pathlib import Path
import torch.nn.functional as F
import os


# Setup Paths & Check Device
root = Path(r"C:\Users\dilku\deepfashion-recsys")
data_path = root / "data" / "deepfashion_index.csv"
ckpt_dir = root / "checkpoints"
ckpt_dir.mkdir(parents=True, exist_ok=True)

device = "cpu"
print(f"Using device: {device.upper()}")


# Triplet Dataset (Anchor, Positive, Negative)
class TripletDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df.reset_index(drop=True)
        self.groups = df.groupby("item_id")
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        a_row = self.df.iloc[idx]
        a_img = self.transform(Image.open(a_row.image_path).convert("RGB"))

        # Positive (same item_id)
        positives = self.groups.get_group(a_row.item_id)
        p_row = positives.sample().iloc[0]
        p_img = self.transform(Image.open(p_row.image_path).convert("RGB"))

        # Negative (different item_id)
        neg_row = self.df[self.df.item_id != a_row.item_id].sample().iloc[0]
        n_img = self.transform(Image.open(neg_row.image_path).convert("RGB"))

        return a_img, p_img, n_img

    
# Triplet Loss Function
def triplet_loss(a, p, n, margin=0.2):
    d_ap = F.pairwise_distance(a, p)
    d_an = F.pairwise_distance(a, n)
    return F.relu(d_ap - d_an + margin).mean()


# Lightning Model
class TripletModel(pl.LightningModule):
    def __init__(self, lr=1e-4):
        super().__init__()
        base = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        base.fc = torch.nn.Linear(base.fc.in_features, 512)
        self.model = base
        self.lr = lr

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        a, p, n = batch
        a_e, p_e, n_e = self(a), self(p), self(n)
        loss = triplet_loss(a_e, p_e, n_e)
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

    
# Prepare Data
df = pd.read_csv(data_path)
train_df = df[df.split == "train"]

print(f"Found {len(train_df)} training images")

tfs = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.ColorJitter(0.2,0.2,0.2,0.1),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

ds = TripletDataset(train_df, tfs)
dl = DataLoader(ds, batch_size=32, shuffle=True, num_workers=4)
print(f"Dataset length: {len(ds)}")


# Setup Trainer with Checkpointing
ckpt_callback = ModelCheckpoint(
    dirpath=str(ckpt_dir),
    filename="triplet-{epoch:02d}-{train_loss:.3f}",
    save_top_k=1,
    monitor="train_loss",
    mode="min"
)

trainer = pl.Trainer(
    max_epochs=10,
    accelerator="auto",
    log_every_n_steps=10,
    enable_progress_bar=True,
    callbacks=[ckpt_callback]
)


# Auto Resume 
# Find latest checkpoint 
ckpts = sorted(ckpt_dir.glob("*.ckpt"), key=lambda p: p.stat().st_mtime, reverse=True)
if ckpts:
    last_ckpt = ckpts[0]
    print(f"Resuming from checkpoint: {last_ckpt.name}")
    model = TripletModel.load_from_checkpoint(str(last_ckpt))
    trainer.fit(model, dl, ckpt_path=str(last_ckpt))
else:
    print("Starting new training run...")
    model = TripletModel(lr=1e-4)
    trainer.fit(model, dl)

    
# Save the Best Model
best_ckpt = ckpt_callback.best_model_path
if best_ckpt:
    print(f"Training complete. Best checkpoint saved at:\n{best_ckpt}")
else:
    print("No checkpoint was saved yet (training interrupted early).")


In [None]:
from pathlib import Path
ckpt_dir = Path(r"C:\Users\dilku\deepfashion-recsys\checkpoints")
print("Checkpoints found:", list(ckpt_dir.glob("*.ckpt")))


In [None]:
#  Quick Debug Version ‚Äî trains on 500 images to verify progress fast

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from PIL import Image
import pandas as pd
from pathlib import Path
import torch.nn.functional as F

# --- Setup ---
root = Path(r"C:\Users\dilku\deepfashion-recsys")
data_path = root / "data" / "deepfashion_index.csv"
ckpt_dir = root / "checkpoints"
ckpt_dir.mkdir(parents=True, exist_ok=True)
print("‚úÖ Setup complete")

# --- Dataset ---
class TripletDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df.reset_index(drop=True)
        self.groups = df.groupby("item_id")
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        a_row = self.df.iloc[idx]
        try:
            a_img = self.transform(Image.open(a_row.image_path).convert("RGB"))
        except:
            return self.__getitem__((idx + 1) % len(self.df))
        positives = self.groups.get_group(a_row.item_id)
        p_row = positives.sample().iloc[0]
        p_img = self.transform(Image.open(p_row.image_path).convert("RGB"))
        neg_row = self.df[self.df.item_id != a_row.item_id].sample().iloc[0]
        n_img = self.transform(Image.open(neg_row.image_path).convert("RGB"))
        return a_img, p_img, n_img

# --- Loss ---
def triplet_loss(a, p, n, margin=0.2):
    d_ap = F.pairwise_distance(a, p)
    d_an = F.pairwise_distance(a, n)
    return F.relu(d_ap - d_an + margin).mean()

# --- Model ---
class TripletModel(pl.LightningModule):
    def __init__(self, lr=1e-4):
        super().__init__()
        base = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        base.fc = torch.nn.Linear(base.fc.in_features, 512)
        self.model = base
        self.lr = lr

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        a, p, n = batch
        a_e, p_e, n_e = self(a), self(p), self(n)
        loss = triplet_loss(a_e, p_e, n_e)
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

# --- Data ---
df = pd.read_csv(data_path)
train_df = df[df.split.str.contains("train", case=False)]
print(f"‚úÖ Sampled {len(train_df)} training images")

tfs = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

ds = TripletDataset(train_df, tfs)
dl = DataLoader(ds, batch_size=16, shuffle=True, num_workers=2, persistent_workers=True)
print("‚úÖ DataLoader ready")

# --- Trainer ---
ckpt_callback = ModelCheckpoint(
    dirpath=str(ckpt_dir),
    filename="debug-{epoch:02d}-{train_loss:.3f}",
    save_top_k=1,
    monitor="train_loss",
    mode="min"
)

trainer = pl.Trainer(
    max_epochs=10, 
    accelerator="auto",
    log_every_n_steps=5,
    enable_progress_bar=True,
    callbacks=[ckpt_callback]
)

model = TripletModel(lr=1e-4)
trainer.fit(model, dl)

print(f"‚úÖ Training complete! Checkpoint saved at: {ckpt_callback.best_model_path}")


In [1]:
# Metric Learning Fine-tuning

#Importing the Libraries

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from PIL import Image
import pandas as pd
from pathlib import Path
import torch.nn.functional as F


# Setup 
root = Path(r"C:\Users\dilku\deepfashion-recsys")
data_path = root / "data" / "deepfashion_index.csv"
ckpt_dir = root / "checkpoints"
ckpt_dir.mkdir(parents=True, exist_ok=True)
print("Setup complete")


# Triplet Dataset (Anchor, Positive, Negative)
class TripletDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df.reset_index(drop=True)
        self.groups = df.groupby("item_id")
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        a_row = self.df.iloc[idx]
        try:
            a_img = self.transform(Image.open(a_row.image_path).convert("RGB"))
        except:
            return self.__getitem__((idx + 1) % len(self.df))
        positives = self.groups.get_group(a_row.item_id)
        p_row = positives.sample().iloc[0]
        p_img = self.transform(Image.open(p_row.image_path).convert("RGB"))
        neg_row = self.df[self.df.item_id != a_row.item_id].sample().iloc[0]
        n_img = self.transform(Image.open(neg_row.image_path).convert("RGB"))
        return a_img, p_img, n_img

    
# Triplet Loss Function
def triplet_loss(a, p, n, margin=0.2):
    d_ap = F.pairwise_distance(a, p)
    d_an = F.pairwise_distance(a, n)
    return F.relu(d_ap - d_an + margin).mean()


# Lightning Model
class TripletModel(pl.LightningModule):
    def __init__(self, lr=1e-4):
        super().__init__()
        base = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        base.fc = torch.nn.Linear(base.fc.in_features, 512)
        self.model = base
        self.lr = lr

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        a, p, n = batch
        a_e, p_e, n_e = self(a), self(p), self(n)
        loss = triplet_loss(a_e, p_e, n_e)
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

    
# Loading Data + transforms
df = pd.read_csv(data_path)
train_df = df[df.split.str.contains("train", case=False)]
print(f"Sampled {len(train_df)} training images")

tfs = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

ds = TripletDataset(train_df, tfs)
dl = DataLoader(ds, batch_size=16, shuffle=True, num_workers=0)
print("DataLoader ready")


# Trainer
ckpt_callback = ModelCheckpoint(
    dirpath=str(ckpt_dir),
    filename="debug-{epoch:02d}-{train_loss:.3f}",
    save_top_k=1,
    monitor="train_loss",
    mode="min"
)

trainer = pl.Trainer(
    max_epochs=10, 
    accelerator="auto",
    log_every_n_steps=5,
    enable_progress_bar=True,
    callbacks=[ckpt_callback]
)

model = TripletModel(lr=1e-4)
trainer.fit(model, dl)

print(f"Training completed! Checkpoint saved at: {ckpt_callback.best_model_path}")


  from .autonotebook import tqdm as notebook_tqdm


Setup complete
Sampled 25882 training images
DataLoader ready


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
C:\Users\dilku\anaconda3\envs\dfashion\lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
C:\Users\dilku\anaconda3\envs\dfashion\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:751: Checkpoint directory C:\Users\dilku\deepfashion-recsys\checkpoints exists and is not empty.

  | Name  | Type   | Params | Mode 
-----------------------------------------
0 | model | ResNet | 24.6 M | train
-------------------------------

Epoch 9: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1618/1618 [3:30:21<00:00,  0.13it/s, v_num=7, train_loss=0.000]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1618/1618 [3:30:21<00:00,  0.13it/s, v_num=7, train_loss=0.000]
Training completed! Checkpoint saved at: C:\Users\dilku\deepfashion-recsys\checkpoints\debug-epoch=00-train_loss=0.000-v1.ckpt
