In [9]:
!pip install peft

  pid, fd = os.forkpty()




In [10]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm
import os
from pathlib import Path
from PIL import Image
import requests
import torch
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset, Dataset
import itertools
from sklearn.model_selection import StratifiedShuffleSplit,train_test_split
from peft import LoraConfig, get_peft_model
from transformers import (
    DataCollator,
    CLIPProcessor, 
    CLIPModel, 
    TrainingArguments, 
    Trainer
)

In [11]:
def hf_clip_predict(model, processor, text_labels, images):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    text = [f"A photo of a {label}" for label in text_labels]
    inputs = processor(text=text, images=images, return_tensors="pt", padding=True).to(device)
    
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
    
    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1)
    return probs

def get_image_paths_and_labels_from_df(df, data_dir):
    article_ids = df["article_id"].values
    image_paths = []
    labels = []
    
    for article_id in article_ids:
        image_path = f"{data_dir}/images/0{str(article_id)[:2]}/0{article_id}.jpg"
        # Check if the image file exists
        if os.path.exists(image_path):
            image_paths.append(image_path)
            # Add corresponding label only if the image exists
            labels.append(df[df["article_id"] == article_id])
        else:
            print(f"Image not found for article_id: {article_id}")
    
    return image_paths, labels

def get_image_paths_and_labels_ordered(df, data_dir):
    article_ids = df["article_id"].values
    image_paths = []
    labels = []
    for article_id in article_ids:
        image_path = f"{data_dir}/images/0{str(article_id)[:2]}/0{article_id}.jpg"
        if os.path.exists(image_path):
            image_paths.append(image_path)
            labels.append(df[df["article_id"] == article_id])
    
    return image_paths, labels

def get_image_paths_and_labels(df, data_dir):
    image_paths = []
    labels = []
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith(".jpg"):
                image_path = os.path.join(root, file)
                image_paths.append(image_path)
                article_id = int(file.split(".")[0])
                labels.append(df[df["article_id"] == article_id])

    return image_paths, labels

class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, image_paths, processor=None):
        self.image_paths = image_paths
        self.processor = processor
        self.image_ids = []

        for image_path in self.image_paths:
            if not os.path.exists(image_path):
                raise FileNotFoundError(f"Image {image_path} not found.")
            else:
                image_id = int(image_path.split("/")[-1].split(".")[0])
                self.image_ids.append(image_id)
            

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx])
        if self.processor is not None:
            inputs = self.processor(images=image, return_tensors="pt", padding=True)
            image = inputs["pixel_values"][0]
        return image, self.image_ids[idx]

In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
model = model.to(device)



In [13]:
text_path = '/kaggle/input/h-and-m-personalized-fashion-recommendations/articles.csv'
articles = pd.read_csv(text_path)
print(articles.shape) # 100k data points
articles.head(1)

(105542, 25)


Unnamed: 0,article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,...,department_name,index_code,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc
0,108775015,108775,Strap top,253,Vest top,Garment Upper body,1010016,Solid,9,Black,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.


In [14]:
# map from article_id to df index
article_id_to_idx = {article_id: idx for idx, article_id in enumerate(articles["article_id"])}

# get all classes of the dataframe
class_names = articles.columns.tolist()
label_names = dict()
label_names_to_idx = dict()
for class_name in class_names:
    label_names[class_name] = articles[class_name].unique()
    label_names_to_idx[class_name] = {label_name: idx for idx, label_name in enumerate(label_names[class_name])}

article_ids = label_names["article_id"]
selected_class_names = ["product_group_name", "product_type_name", "graphical_appearance_name", "colour_group_name", "perceived_colour_value_name", "perceived_colour_master_name", "department_name", "index_name", "index_group_name", "section_name", "garment_group_name"]

In [15]:
# get label names in product group name with less than 10 samples
product_group_name_cnts = articles["product_group_name"].value_counts()
removed_label_names = product_group_name_cnts[product_group_name_cnts < 10]

# remove data with the removed label name
removed_label_idxs = articles[articles["product_group_name"].isin(removed_label_names.index)].index
articles = articles.drop(removed_label_idxs)

In [16]:
data_dir = "/kaggle/input/h-and-m-personalized-fashion-recommendations"
image_paths, labels = get_image_paths_and_labels_from_df(articles, data_dir)
print(f"Number of images: {len(image_paths)}")

Image not found for article_id: 174057028
Image not found for article_id: 179208001
Image not found for article_id: 212042043
Image not found for article_id: 212042066
Image not found for article_id: 212629004
Image not found for article_id: 215324023
Image not found for article_id: 216961011
Image not found for article_id: 218354021
Image not found for article_id: 241602023
Image not found for article_id: 268305006
Image not found for article_id: 272591001
Image not found for article_id: 324963030
Image not found for article_id: 348657006
Image not found for article_id: 357792006
Image not found for article_id: 364409037
Image not found for article_id: 369423002
Image not found for article_id: 369423004
Image not found for article_id: 388916001
Image not found for article_id: 395864048
Image not found for article_id: 396135047
Image not found for article_id: 396135048
Image not found for article_id: 396690010
Image not found for article_id: 397376010
Image not found for article_id: 39

In [17]:
# strafity with product_group_name
product_group_name = [label["product_group_name"] for label in labels]
# split 0.6/0.2/0.2
train_image_paths, test_image_paths, train_labels, test_labels = train_test_split(
    image_paths, product_group_name, stratify=product_group_name, test_size=0.4, random_state=42)
val_image_paths, test_image_paths, val_labels, test_labels = train_test_split(
    test_image_paths, test_labels, stratify=test_labels, test_size=0.5, random_state=42)

In [18]:
# Define LoRA configuration
lora_config = LoraConfig(
    r=8,                  # Low-rank dimension (adjustable)
    lora_alpha=32,          # Scaling factor (adjustable)
    target_modules=["q_proj", "v_proj", "k_proj"],  # Specify which layers to apply LoRA to
    lora_dropout=0.05,       # Dropout rate (optional)
    bias="none",            # Whether to include biases ("none", "all", "lora_only")
    task_type="classification"  # Task type ("classification" or "regression")
)

# Apply LoRA to the CLIP model
model = get_peft_model(model, lora_config)

In [23]:
train_dataset = ImageDataset(train_image_paths, processor)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True)

val_dataset = ImageDataset(val_image_paths, processor)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=256, shuffle=False)

test_dataset = ImageDataset(test_image_paths, processor)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=False)

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criteria = torch.nn.CrossEntropyLoss()

model.to(device)

class_name = "product_group_name"

# print trainable parameters
#for name, param in model.named_parameters():
#    if param.requires_grad:
#        print(name)

In [24]:
def validate(model, dataloader, criteria, device, text_inputs, class_name):
    model.eval()
    total_loss = 0.0
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
        for images, image_ids in dataloader:
            images = images.to(device)

            # Get true labels from image_ids
            true_labels = [label_names_to_idx[class_name][articles.loc[article_id_to_idx[image_id.item()], class_name]] 
                       for image_id in image_ids]
            true_labels = torch.tensor(true_labels).to(device)

            # Forward pass
            outputs = model(**text_inputs, pixel_values=images)
            logits_per_image = outputs.logits_per_image

            # Compute loss
            loss = criteria(logits_per_image, true_labels)
            total_loss += loss.item() * images.size(0)

            # Predictions and accuracy
            _, preds = torch.max(logits_per_image, dim=1)
            total_correct += (preds == true_labels).sum().item()
            total_samples += images.size(0)

    avg_loss = total_loss / total_samples
    accuracy = total_correct / total_samples
    return avg_loss, accuracy

In [25]:
# Prepare text inputs once
text_inputs = processor(
    text=[f"A photo of a {label}" for label in label_names[class_name]],
    return_tensors="pt",
    padding=True
).to(device)

In [None]:
num_epochs = 2  # Adjust as needed
criteria = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    total_correct = 0
    total_samples = 0

    for images, image_ids in tqdm(train_dataloader):
        images = images.to(device)

        # Get true labels from image_ids
        true_labels = [label_names_to_idx[class_name][articles.loc[article_id_to_idx[image_id.item()], class_name]] 
                       for image_id in image_ids]
        true_labels = torch.tensor(true_labels).to(device)

        # Forward pass
        outputs = model(**text_inputs, pixel_values=images)
        logits_per_image = outputs.logits_per_image  # Shape: [batch_size, num_classes]
        # Do we need softmax here??

        # Compute loss
        loss = criteria(logits_per_image, true_labels)
        total_loss += loss.item() * images.size(0)

        # Predictions and accuracy
        _, preds = torch.max(logits_per_image, dim=1)
        total_correct += (preds == true_labels).sum().item()
        total_samples += images.size(0)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    avg_loss = total_loss / total_samples
    accuracy = total_correct / total_samples
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}")

    # Validate after each epoch
    val_loss, val_accuracy = validate(model, val_dataloader, criteria, device, text_inputs, class_name)
    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")


  0%|          | 0/247 [00:00<?, ?it/s]

Epoch [1/2], Loss: 0.5275, Accuracy: 0.8406


In [None]:
model_save_path = 'model.pth'
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")


In [None]:
test_loss, test_accuracy = validate(model, test_dataloader, criteria, device, text_inputs, class_name)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

In [None]:
"""
losses = []

for epoch in range(2):  # Change number of epochs as needed
    model.train()
    for images, image_ids in tqdm(train_dataloader):
        # Get inputs and move to the appropriate device
        images = images.to(device)
        text_inputs = processor(text=[f"A photo of a {label}" for label in label_names[class_name]], 
                                return_tensors="pt", padding=True)
        text_inputs = text_inputs.to(device)

        # Forward pass
        outputs = model(**text_inputs, pixel_values=images)

        logits_per_image = outputs.logits_per_image
        probs = logits_per_image.softmax(dim=1)

        # Get the indices of the labels
        true_labels = [label_names_to_idx[class_name][articles.loc[article_id_to_idx[image_id.item()], class_name]] 
                       for image_id in image_ids]
        true_labels = torch.tensor(true_labels)
        
        # Into one-hot encoding
        y = torch.nn.functional.one_hot(true_labels, num_classes=len(label_names[class_name]))
        
        # Convert into fp32
        y = y.float().to(device)

        loss = criteria(probs, y)
        losses.append(loss.item())

        # Optimizer step
        optimizer.zero_grad()
        loss.backward()

    print(f"Epoch {epoch + 1} completed. Loss: {loss.item()}")
    """