# Constants
Change these to fit your needs

In [1]:
# Are you developing locally or on google colab?
COLAB = True

# path kaggle will download to
HM_DATA_PATH = "/content/drive/MyDrive/dd2430/data/" if COLAB else "./data/"

# path tourch.save and .load will use
PTH_SAVE_PATH = "/content/drive/MyDrive/dd2430/pth/" if COLAB else "./pth/"

load_direct = True
if load_direct is True:
  HM_DATA_PATH, PTH_SAVE_PATH= '', '/content/'

# False if you have already downloaded once
DOWNLOAD_FROM_KAGGLE = False

# False if you have already created and saved a .pth file to PTH_SAVE_PATH
CREATE_NEW_DATASET = False

# train, test, val set size. Should sum to 1
SET_SIZES = {
    "train": 0.8,
    "test": 0.1,
    "val": 0.1,
}

# samples per class in uniform dataset
N_SAMPLES = 500

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Imports

In [None]:
import os
import random
import numpy as np
from tqdm import tqdm
import importlib
import gdown
import numpy as np
from sklearn.metrics import classification_report
import seaborn as sns
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

import transformers
if COLAB:
    from google.colab import files, drive
    #!pip install git
    #!git clone https://[ADD PERSONAL ACCESS TOKEN]@github.com/Samin765/DD2430_Project.git
    !git clone https://github.com/Samin765/DD2430_Project.git

    #to pull down all files
    !mv -v /content/DD2430_Project/* /content/


In [4]:
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
if torch.backends.mps.is_available(): # For apple silicon
    device = 'mps'

print("Using device: ", device)

Using device:  cuda


# Download data

In [None]:
if DOWNLOAD_FROM_KAGGLE:
    !pip install kaggle
    files.upload()

    os.makedirs('/root/.kaggle', exist_ok=True)
    !cp kaggle.json /root/.kaggle/
    !chmod 600 /root/.kaggle/kaggle.json

    %cd HM_DATA_PATH

    !kaggle competitions download -c h-and-m-personalized-fashion-recommendations


In [None]:
if DOWNLOAD_FROM_KAGGLE:
    !unzip -q h-and-m-personalized-fashion-recommendations.zip

#Dataset

## Create new dataset
This will create a new dataset and save it as a .pth to google drive. If you getan error, then it is most likely becuase you can not make a daatset that large.

In [None]:
import datasets
importlib.reload(datasets)

if CREATE_NEW_DATASET:
    dataset = datasets.HMDataset2(
        articles_csv = HM_DATA_PATH + 'articles.csv',
        image_dir = HM_DATA_PATH + 'images',
        main_class = 'garment_group_name',
        model = transformers.CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device),
        processor = transformers.CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    )
    # data per class
    n_samples =  N_SAMPLES

    assert dataset.articles[dataset.main_class].value_counts().min()>n_samples, 'Can not make balanced set'

    # you can also set all to n_samples then set the ones you want to 0
    for exclude_subclass in ['Unknown', 'Special Offers', 'some other']:
        dataset.counts[exclude_subclass]=n_samples

    # Create uniform dataset
    image_emb, labels, images = dataset.get_n_of_each(n_samples)

    data_to_save = {
        'image_embedding': image_emb,
        'class_text': labels,
        'images': images,
    }
    os.makedirs(PTH_SAVE_PATH, exist_ok=True)
    torch.save(data_to_save, f'{PTH_SAVE_PATH}HM_data_{n_samples}.pth')

In [None]:
def save_to_drive(data, n_samples):
    """Save the dataset you created to drive"""
    from google.colab import drive
    drive.mount('/content/drive')
    torch.save(data, f'/content/drive/My Drive/Skola/HM_data_{n_samples}.pth')
save_to_drive(data_to_save, N_SAMPLES)

## Load dataset
If you already have the HM data and .pth saved in google drive, this is where the actual code/program begins.

Remember to change the constants at the top so you don't download and/or create a new dataset next time.

In [None]:
def load_from_private_drive():
    """If you have it locally"""
    #drive.mount('/content/drive')
    n_samples = N_SAMPLES
    file_to_load = f"HM_data_{n_samples}.pth"

    loaded_data = torch.load(f'/content/drive/MyDrive/Skola/{file_to_load}')

    image_emb = loaded_data['image_embedding']
    labels = loaded_data['class_text']
    images = loaded_data['images']

    #dataset = UniformHMDataset(image_emb, labels , images)
    return image_emb, images, labels

def load_from_project_drive(link):
    """Shared file in drive and insert link id"""
    import gdown
    output = 'Dataset_loaded.pth'
    gdown.download(f"https://drive.google.com/uc?id={link}", output, quiet=False)
    loaded_data = torch.load(output, map_location=device)
    image_emb = loaded_data['image_embedding']
    labels = loaded_data['class_text']
    images = loaded_data['images'] # good to have to see sclaling and transform..
    #dataset = UniformHMDataset(image_emb, labels , images)
    return image_emb, images, labels

project_drive = False # if you want to load from my drive, NEW SET
if project_drive:# DO NOT USE TOO OFTEN OR 24h waiting period
  data = load_from_project_drive(link='16we5aQ_Mbk1791_8WT522HmR0lwlwwqQ')
else:
  data = load_from_private_drive()
image_emb0, images0, labels0 = data

# Evaluate

**Pulls latest changes from Github if needed**

In [None]:
%cd DD2430_Project
!git pull https://github.com/Samin765/DD2430_Project.git
!mv -v /content/DD2430_Project/* /content/

In [14]:
# functions for clip text and image embeddings, forward pass etc
# remember to import this again if you change something
# And varius function that doesn't fit into model_functions or datasets
# for example displaying images
import model_functions, utils, training, datasets
importlib.reload(utils) # issues with colab not updating
importlib.reload(training)
importlib.reload(model_functions)
importlib.reload(datasets)
import model_functions, utils, training

**Split datasets**

In [None]:
dataset, dataset_train, dataset_test, dataset_val = datasets.split(labels0, image_emb0, images0, N_SAMPLES, SET_SIZES)
batch_size = 200
dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)
dataloader_val = DataLoader(dataset_val, batch_size=batch_size, shuffle=False)
dataloader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=False)

**Hyperparam search for LoRA**

---



In [None]:
# Initialize the model and processor
import itertools


# Hyperparameter space
learning_rates = [1e-5, 5e-5, 1e-4]
initial_ranks = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 256, 256]
weight_decays = [0.0, 0.01, 0.1 , 0.0001]

results = []

for lr, wd in itertools.product(learning_rates, weight_decays):
    ranks = initial_ranks.copy()
    start = len(ranks) - 3

    for i in range(start, len(ranks) - 1):
        ranks[i] = 0

        if all(rank == 0 for rank in ranks):
            print("All ranks are set to 0. Exiting the loop.")
            break

        model = transformers.CLIPModel.from_pretrained("openai/clip-vit-base-patch32", force_download=True).to(device)
        processor = transformers.CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", force_download=True)
        processor.feature_extractor.do_rescale = False  # Ensure image values are between [0-1]

        lora_layers = []
        lora_layers = model_functions.apply_lora_to_transformer(model.text_model.encoder.layers, lora_layers, ranks)
        lora_params_attention = model_functions.get_lora_params(model, print_layer=False)

        dataloaders = {'train': dataloader_train, 'val': dataloader_val, 'test': dataloader_test}
        clip = {'m': model, 'p': processor}
        ft = training.FinetuneCLIP(dataloaders, clip, epochs = 50)

        ft.tt['soft'], ft.tt['LoRA'], ft.tt['image_fc'] = 0, 1, 1  # Enable LoRA and Image FC
        ft.initialize({'LoRA': lora_params_attention, 'lr': lr, 'weight_decay': wd})
        ft.count_parameters()

        ft.train()

        all_predictions, all_labels, acc = ft.eval(False)
        utils.confussion_matrix(all_labels, all_predictions, list(dataset_test.class_to_id.keys()), F1=False)
        print(f"Accuracy for rank configuration {ranks} with lr={lr}, wd={wd} is {acc:.2f} %")

        results.append({'ranks': ranks.copy(), 'accuracy': acc, 'lr': lr, 'wd': wd})

for result in results:
    print(f"Rank configuration: {result['ranks']}, Learning Rate: {result['lr']}, Weight Decay: {result['wd']}, Accuracy: {result['accuracy']:.2f} %")

In [None]:
model = transformers.CLIPModel.from_pretrained("openai/clip-vit-base-patch32", force_download = True).to(device)
processor = transformers.CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", force_download = True)
processor.feature_extractor.do_rescale = False # make sure image values: False=> [0-1] and True=> [0,255]

**LoRA Train with specifc Parameters**

---



In [None]:
ranks = [0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 256]  # Only apply LoRA with rank 64 to the last layer
lr = 1e-05
wd = 0.0001
epochs_num = 200
lora_layers = []

lora_layers = model_functions.apply_lora_to_transformer(model.text_model.encoder.layers , lora_layers, ranks)
lora_params_attention = model_functions.get_lora_params(model, print_layer = True);

# Set up dataloaders and fine-tune process
dataloaders = {'train': dataloader_train, 'val': dataloader_val, 'test': dataloader_test}
clip = {'m': model, 'p': processor}
ft = training.FinetuneCLIP(dataloaders, clip, epochs = epochs_num )

# Initialize LoRA training with current hyperparameters
ft.tt['soft'], ft.tt['LoRA'], ft.tt['image_fc'] = 0, 1 , 0  # Enable LoRA
ft.initialize({'LoRA': lora_params_attention, 'lr': lr, 'weight_decay': wd})
ft.count_parameters()

# Set the optimizer parameters for learning rate and weight decay
#ft.optimizer_params = {'lr': lr, 'weight_decay': wd}  # Ensure your FinetuneCLIP handles this

# Train the model
ft.train()

# Evaluate the model
all_predictions, all_labels, acc = ft.eval(False)
#utils.confussion_matrix(all_labels, all_predictions, list(dataset_test.class_to_id.keys()), F1=False)
ft.plot_loss()

print(f"Accuracy for rank configuration {ranks} with lr={lr}, scale={scale}, wd={wd} is {acc:.2f} %")

**Train SoftPrompt with Tuned LoRA/CLIP Model**

---



In [None]:
dataloaders = {'train':dataloader_train, 'val':dataloader_val, 'test':dataloader_test}
clip = {'m':model, 'p':processor}
num_soft = 5
ft = training.FinetuneCLIP(dataloaders, clip)
ft.tt['soft'], ft.tt['LoRA'], ft.tt['image_fc'] = 1, 0 , 1 #soft
ft.initialize({'num_soft':num_soft, 'add':''})
ft.train()

# Evaluate the model
all_predictions, all_labels, acc = ft.eval(False)
#utils.confussion_matrix(all_labels, all_predictions, list(dataset_test.class_to_id.keys()), F1=False)
ft.plot_loss()

print(f"Accuracy  {acc:.2f} %")
