# Setup
## Constants

In [1]:
!pip install transformers -q
!pip install torch -q
!pip install torchvision -q

In [1]:
# False if you have already created and saved a .pth file to PTH_SAVE_PATH
CREATE_NEW_DATASET = True

# train, test, val set size. Should sum to 1
SET_SIZES = {
    "train": 0.8,
    "test": 0.1,
    "val": 0.1,
}

# samples per class in uniform dataset
N_SAMPLES = 400

# path to dataset (do not change)
HM_DATA_PATH = "../dataset/"

# path to pth saves (do not change)
PTH_SAVE_PATH = "../pth/"

## Imports

In [2]:
import os, sys, random, importlib, transformers, itertools, copy
import numpy as np, torch.nn as nn, torch, seaborn as sns, matplotlib.pyplot as plt, pandas as pd
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix
from torch.utils.data import DataLoader
print(os.getcwd())# Our own files
# sys.path.append('./src/')
import model_functions, utils, training, datasets
def set_seed(seed):# reproducable
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_seed(0)

  from .autonotebook import tqdm as notebook_tqdm


/Users/daghjelm/Documents/kth/ds-proj/DD2430_Project/src


In [3]:
def update():# if you change our files
    import model_functions, utils, training, datasets
    for lib in [model_functions, utils, training, datasets]:
        importlib.reload(lib)# issues with not updating
update()

In [4]:
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available(): # For apple silicon
    device = 'mps'
print("Using device:", device)

Using device: mps


In [13]:
model = transformers.CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = transformers.CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
processor.feature_extractor.do_rescale = False # make sure image values: False=> [0-1] and True=> [0,255]



# Dataset

### Full dataset, run once

In [5]:
df = pd.read_csv(HM_DATA_PATH+'articles_filtered.csv')
embs = torch.load(HM_DATA_PATH+'embedds.pth', weights_only=True) # all 100k embeddings
labs = torch.load(HM_DATA_PATH+'labels.pth', weights_only=True).tolist() #  100k labels
hmd = datasets.HMDatasetDuplicates(embs, np.array(labs), df)
print(hmd.article_id2suclass(694805002, 'garment_group_name'))
#print(hmd.list_article_id2suclass(labs, 'garment_group_name'))#all
print(len(labs))
BALANCED = False

Knitwear
105099


In [6]:
update()
batch_size = 64
set_sizes = {"train": 0.8, "val": 0.1}
data = datasets.datasets(embs, np.array(labs), df, set_sizes, True)# takes 3 min

47071 Train size: 37656, Val size: 4707, Test size: 4708
105099
This should be empty set() set()
The resulting sizes 84126 4707 4708


### Subsets

In [None]:
update()
exclude_classes = ['Special Offers', 'Woven/Jersey/Knitted mix Baby','Unknown']
dataloaders_imbalanced = datasets.get_dataloaders('garment_group_name', data, 5000, exclude_classes, 32)# look at Resource Utilization to see if capping

100%|██████████| 84126/84126 [01:09<00:00, 1215.29it/s]
100%|██████████| 4707/4707 [00:03<00:00, 1245.49it/s]
100%|██████████| 4708/4708 [00:03<00:00, 1237.44it/s]
100%|██████████| 84126/84126 [00:35<00:00, 2375.26it/s]


Final class count for train: {'Knitwear': 5000, 'Shoes': 4088, 'Shorts': 1248, 'Trousers Denim': 2455, 'Under-, Nightwear': 5000, 'Socks and Tights': 1858, 'Dressed': 725, 'Accessories': 5000, 'Trousers': 5000, 'Skirts': 979, 'Shirts': 1705, 'Swimwear': 2213, 'Blouses': 4636, 'Jersey Basic': 5000, 'Outdoor': 3613, 'Dresses Ladies': 3880, 'Jersey Fancy': 5000, 'Dresses/Skirts girls': 1234}


100%|██████████| 4707/4707 [00:01<00:00, 2357.97it/s]


Final class count for val: {'Knitwear': 305, 'Shoes': 270, 'Shorts': 59, 'Trousers Denim': 139, 'Under-, Nightwear': 246, 'Socks and Tights': 62, 'Dressed': 49, 'Accessories': 651, 'Trousers': 303, 'Skirts': 68, 'Shirts': 91, 'Swimwear': 120, 'Blouses': 314, 'Jersey Basic': 177, 'Outdoor': 241, 'Dresses Ladies': 251, 'Jersey Fancy': 863, 'Dresses/Skirts girls': 84}


100%|██████████| 4708/4708 [00:01<00:00, 2362.31it/s]


Final class count for test: {'Knitwear': 312, 'Shoes': 243, 'Shorts': 56, 'Trousers Denim': 146, 'Under-, Nightwear': 238, 'Socks and Tights': 69, 'Dressed': 57, 'Accessories': 631, 'Trousers': 266, 'Skirts': 67, 'Shirts': 71, 'Swimwear': 139, 'Blouses': 316, 'Jersey Basic': 180, 'Outdoor': 246, 'Dresses Ladies': 263, 'Jersey Fancy': 866, 'Dresses/Skirts girls': 77}


100%|██████████| 58634/58634 [00:47<00:00, 1222.67it/s]
100%|██████████| 4243/4243 [00:03<00:00, 1202.31it/s]
100%|██████████| 4293/4293 [00:03<00:00, 1250.03it/s]


In [None]:
BALANCED = False
dataloaders = dataloaders_imbalanced

# Evaluate

## Baseline

The performance of the untuned CLIP

## LoRA

**LoRA Train with specifc Parameters**

---


In [None]:
file_name = "lora-cap-5000_lora_model_120.pth"

In [None]:
update()
ranks = [0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 256]  # Only apply LoRA with rank 64 to the last layer

#ranks = [256, 256, 256, 256, 256, 256, 256, 256,256, 256, 256, 256]  # Only apply LoRA with rank 64 to the last layer
lr = 1e-03
wd = 0.001
epochs_num = 40
lora_layers = []

clip = {'m': copy.deepcopy(model), 'p': processor} # do not load each time
lora_layers = model_functions.apply_lora_to_transformer(clip['m'].text_model.encoder.layers , lora_layers, ranks)
lora_params_attention = model_functions.get_lora_params(clip['m'], print_layer = True)


ft = training.FinetuneCLIP(dataloaders, clip, epochs = epochs_num )
ft.conf = {'epochs': epochs_num, 'balanced':BALANCED}
ft.model_prefix = "lora-cap-5000-2-120-start"

# Initialize LoRA training with current hyperparameters
ft.tt['soft'], ft.tt['LoRA'], ft.tt['image_fc'] = 0, 1 ,0 # Enable LoRA
ft.initialize({'LoRA': lora_params_attention, 'lr': lr, 'weight_decay': wd, 'num_soft':0, 'add':''},
              load=True, file_name=file_name)

ft.count_parameters()
all_predictions, all_labels, acc = ft.eval(False)

# Train the model
ft.es['pat']=10
ft.train()

# Evaluate the model
all_predictions, all_labels, acc = ft.eval(False)
utils.confussion_matrix(all_labels, all_predictions, list(dataloaders['test'].dataset.class_to_id.keys()), F1=False)
ft.plot_loss_key('train')
ft.plot_loss_key('val')

print(f"Accuracy for rank configuration {ranks} with lr={lr}, wd={wd} is {acc:.2f} %")