In [1]:
from PIL import Image
import requests
from transformers import CLIPProcessor, CLIPModel
import torch
import pandas as pd
from src import util
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import os

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", 
                                  cache_dir="model", local_files_only=True)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", 
                                          cache_dir="model", local_files_only=True)
model = model.to(device)



In [3]:
# number of parameters
print(f"Number of parameters: {model.num_parameters()}")

Number of parameters: 151277313


In [4]:
data_dir = "data"
articles = pd.read_csv(f"{data_dir}/articles.csv")
# customers = pd.read_csv(f"{data_dir}/customers.csv")
# transactions = pd.read_csv(f"{data_dir}/transactions_train.csv")

In [5]:
# map from article_id to df index
article_id_to_idx = {article_id: idx for idx, article_id in enumerate(articles["article_id"])}

# get all classes of the dataframe
class_names = articles.columns.tolist()
label_names = dict()
for class_name in class_names:
    label_names[class_name] = articles[class_name].unique()
    print(f"{class_name}: {len(label_names[class_name])}")
article_ids = label_names["article_id"]
selected_class_names = ["product_group_name", "product_type_name", "graphical_appearance_name", "colour_group_name", "perceived_colour_value_name", "perceived_colour_master_name", "department_name", "index_name", "index_group_name", "section_name", "garment_group_name"]

article_id: 105542
product_code: 47224
prod_name: 45875
product_type_no: 132
product_type_name: 131
product_group_name: 19
graphical_appearance_no: 30
graphical_appearance_name: 30
colour_group_code: 50
colour_group_name: 50
perceived_colour_value_id: 8
perceived_colour_value_name: 8
perceived_colour_master_id: 20
perceived_colour_master_name: 20
department_no: 299
department_name: 250
index_code: 10
index_name: 10
index_group_no: 5
index_group_name: 5
section_no: 57
section_name: 56
garment_group_no: 21
garment_group_name: 21
detail_desc: 43405


In [6]:
image_paths, labels = util.get_image_paths_and_labels_ordered(articles, data_dir)
print(f"Number of images: {len(image_paths)}")

# split 0.6/0.2/0.2
# image_paths_train, image_paths_val, labels_train, labels_val = train_test_split(
#     image_paths, labels, test_size=0.2, random_state=42)
# image_paths_train, image_paths_test, labels_train, labels_test = train_test_split(
#     image_paths_train, labels_train, test_size=0.25, random_state=42)

dataset = util.ImageDataset(image_paths=image_paths, labels=labels, processor=processor)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=False)

Number of images: 105100


In [7]:
# get the first batch
images, image_ids = next(iter(dataloader))

for class_name in selected_class_names:
    label_name = label_names[class_name]
    text_inputs = processor(text=[f"A photo of a {label}" for label in label_name], return_tensors="pt", padding=True)
    text_inputs = text_inputs.to(device)
    images = images.to(device)

    with torch.no_grad():
        outputs = model(**text_inputs, pixel_values=images)

    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1)

    probs = probs.to("cpu")

    values, indices = torch.topk(probs, k=5, dim=1)

    # Only print the prediction for the first image
    true_label = articles.loc[article_id_to_idx[image_ids[0].item()], class_name]
    print(f"Class name: {class_name}\tTrue label: {true_label}")
    for i, idx in enumerate(indices[0]):
        print(f"{label_name[idx.item()]}:\t{values[0][i].item():.4f}")
    print()

Class name: product_group_name	True label: Garment Upper body
Garment Upper body:	0.4556
Nightwear:	0.1890
Garment Lower body:	0.1040
Garment Full body:	0.0965
Underwear/nightwear:	0.0912

Class name: product_type_name	True label: Vest top
Vest top:	0.8381
Swimwear top:	0.0419
Jumpsuit/Playsuit:	0.0313
Bodysuit:	0.0281
Pyjama jumpsuit/playsuit:	0.0146

Class name: graphical_appearance_name	True label: Solid
Chambray:	0.1865
Other pattern:	0.1409
Slub:	0.1329
Melange:	0.1112
Colour blocking:	0.0713

Class name: colour_group_name	True label: Black
Dark Green:	0.1954
Black:	0.1702
Dark Blue:	0.0994
Dark Grey:	0.0950
Dark Beige:	0.0634

Class name: perceived_colour_value_name	True label: Dark
Dark:	0.4158
Medium:	0.3423
Medium Dusty:	0.1090
Bright:	0.0814
Undefined:	0.0384

Class name: perceived_colour_master_name	True label: Black
Black:	0.6113
Khaki green:	0.0914
Grey:	0.0558
White:	0.0548
Brown:	0.0445

Class name: department_name	True label: Jersey Basic
Light Basic Jersey:	0.1492
Jers

In [10]:
bs = 128
shuffle = False
dataloader = torch.utils.data.DataLoader(dataset, batch_size=bs, shuffle=shuffle)

top1_correct = {class_name: 0 for class_name in selected_class_names}
top5_correct = {class_name: 0 for class_name in selected_class_names}

date = os.popen("date +'%Y-%m-%d_%H-%M-%S'").read().strip()

with open(f"log/{date}.log", "w") as f:
    f.write(f"{bs=} {shuffle=} {len(dataset)=}\n")
    batch_idx = 0

    for images, image_ids in tqdm(dataloader):
        f.write(f"{batch_idx=}\n")
        batch_idx += 1

        for class_name in selected_class_names:
            f.write(f"{class_name=}\n")
            label_name = label_names[class_name]
            text_inputs = processor(text=[f"A photo of a {label}" for label in label_name], return_tensors="pt", padding=True)
            text_inputs = text_inputs.to(device)
            images = images.to(device)

            with torch.no_grad():
                outputs = model(**text_inputs, pixel_values=images)

            logits_per_image = outputs.logits_per_image
            probs = logits_per_image.softmax(dim=1)

            probs = probs.to("cpu")

            values, indices = torch.topk(probs, k=5, dim=1)
        
            for i, idx in enumerate(indices):
                true_label = articles.loc[article_id_to_idx[image_ids[i].item()], class_name]
                top5_correct[class_name] += (true_label in [label_name[j.item()] for j in idx])
                top1_correct[class_name] += (true_label == label_name[idx[0].item()])

    f.write(f"{top1_correct=}\n")
    f.write(f"{top5_correct=}\n")

for class_name in selected_class_names:
    print(f"{class_name=}")
    print(f"Top-1 accuracy: {top1_correct[class_name] / len(dataset)}")
    print(f"Top-5 accuracy: {top5_correct[class_name] / len(dataset)}")
    print()

100%|██████████| 822/822 [1:21:18<00:00,  5.94s/it]

class_name='product_group_name'
Top-1 accuracy: 0.33465271170313987
Top-5 accuracy: 0.9118173168411037

class_name='product_type_name'
Top-1 accuracy: 0.3812369172216936
Top-5 accuracy: 0.7733396764985728

class_name='graphical_appearance_name'
Top-1 accuracy: 0.07270218839200761
Top-5 accuracy: 0.368116079923882

class_name='colour_group_name'
Top-1 accuracy: 0.2691341579448145
Top-5 accuracy: 0.7252045670789724

class_name='perceived_colour_value_name'
Top-1 accuracy: 0.14960989533777355
Top-5 accuracy: 0.6665271170313987

class_name='perceived_colour_master_name'
Top-1 accuracy: 0.5999048525214081
Top-5 accuracy: 0.8928924833491912

class_name='department_name'
Top-1 accuracy: 0.08846812559467174
Top-5 accuracy: 0.3135775451950523

class_name='index_name'
Top-1 accuracy: 0.3254900095147479
Top-5 accuracy: 0.7511132254995243

class_name='index_group_name'
Top-1 accuracy: 0.41586108468125593
Top-5 accuracy: 1.0

class_name='section_name'
Top-1 accuracy: 0.14439581351094197
Top-5 accur


