In [1]:
from PIL import Image
import requests
from transformers import CLIPProcessor, CLIPModel
import torch
import pandas as pd
from src import util

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")



In [3]:
# number of parameters
print(f"Number of parameters: {model.num_parameters()}")

Number of parameters: 151277313


In [4]:
data_dir = "data"
articles = pd.read_csv(f"{data_dir}/articles.csv")
# customers = pd.read_csv(f"{data_dir}/customers.csv")
# transactions = pd.read_csv(f"{data_dir}/transactions_train.csv")

In [5]:
articles.head(1)

Unnamed: 0,article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,...,department_name,index_code,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc
0,108775015,108775,Strap top,253,Vest top,Garment Upper body,1010016,Solid,9,Black,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.


In [6]:
# organizing all product_group_name in a list
print("product group names\n")
product_group_name = articles['product_group_name'].unique()
print(product_group_name)

image_path = f"{data_dir}/images/010/0108775015.jpg"
image = Image.open(image_path)

probs = util.hf_clip_predict(model, processor, product_group_name, image)
probs = probs.to("cpu")

# # Pick the top 5 most similar labels for the image
values, indices = torch.topk(probs, 5)
values = values.squeeze().to("cpu")
indices = indices.squeeze().to("cpu")

# Print the result
print("\nTop predictions of product_group_name:\n")
for value, index in zip(values, indices):
    print(f"{product_group_name[index]}: {value:.4f}")

product group names

['Garment Upper body' 'Underwear' 'Socks & Tights' 'Garment Lower body'
 'Accessories' 'Items' 'Nightwear' 'Unknown' 'Underwear/nightwear' 'Shoes'
 'Swimwear' 'Garment Full body' 'Cosmetic' 'Interior textile' 'Bags'
 'Furniture' 'Garment and Shoe care' 'Fun' 'Stationery']

Top predictions of product_group_name:

Garment Upper body: 0.4556
Nightwear: 0.1890
Garment Lower body: 0.1040
Garment Full body: 0.0965
Underwear/nightwear: 0.0912


In [7]:
# get all classes of the dataframe
class_names = articles.columns.tolist()
label_names = dict()
for class_name in class_names:
    label_names[class_name] = articles[class_name].unique()
    print(f"{class_name}: {len(label_names[class_name])}")
article_ids = label_names["article_id"]

article_id: 105542
product_code: 47224
prod_name: 45875
product_type_no: 132
product_type_name: 131
product_group_name: 19
graphical_appearance_no: 30
graphical_appearance_name: 30
colour_group_code: 50
colour_group_name: 50
perceived_colour_value_id: 8
perceived_colour_value_name: 8
perceived_colour_master_id: 20
perceived_colour_master_name: 20
department_no: 299
department_name: 250
index_code: 10
index_name: 10
index_group_no: 5
index_group_name: 5
section_no: 57
section_name: 56
garment_group_no: 21
garment_group_name: 21
detail_desc: 43405


In [8]:
# images, image_ids = util.get_images_recursive(data_dir="data")
images, image_ids = util.get_images_from_ids(data_dir="data", article_ids=article_ids)
print(f"{len(images)=}")

len(images)=105100


In [26]:
max_bs = 128
probs = util.hf_clip_predict(model, processor, product_group_name, images[:max_bs]).to("cpu")
values, indices = torch.topk(probs, k=5, dim=1)

In [27]:
for i in indices[0]:
    print(f"{product_group_name[i]}: {probs[0][i]:.4f}")

Garment Upper body: 0.4556
Nightwear: 0.1890
Garment Lower body: 0.1040
Garment Full body: 0.0965
Underwear/nightwear: 0.0912
