In [1]:
import torch
from transformers import AutoImageProcessor, AutoModel
from glob import glob
from PIL import Image
from tqdm.notebook import tqdm
import numpy as np
import os
import pandas as pd
import pickle

In [2]:
import platform
if platform.system() == 'Darwin':
    DATA_PATH = "/Users/maltegenschow/Documents/Uni/Thesis/Data.nosync"
elif platform.system() == 'Linux':
    DATA_PATH = "/pfs/work7/workspace/scratch/tu_zxmav84-thesis/Data.nosync"

In [3]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

## Setup Model

In [4]:
model_name = "facebook/dinov2-base"
processor = AutoImageProcessor.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model = model.to(device)

### Load Model

In [5]:
df = pd.read_json(f"{DATA_PATH}/Zalando_Germany_Dataset/dresses/metadata/dresses_metadata.json").T.reset_index().rename(columns={'index': 'sku'})[['sku', 'garment_type']]
# Create id2label and label2id mappings
id2label = {i:elem for i,elem in enumerate(df.garment_type.value_counts().index)}
label2id = {elem:i for i,elem in enumerate(df.garment_type.value_counts().index)}
# Map labels to ids
df['label'] = df.garment_type.map(label2id)
# Save to disc
pickle.dump(id2label, open(f"garment_type_id2label.pkl", "wb"))

In [6]:
save_path = f"{DATA_PATH}/Models/Assessor/DinoV2/Embeddings/dinov2_embeddings.pt"
if not os.path.exists(save_path):
    print('Calculating embeddings from DINOV2 model')
    embeddings = torch.zeros(df.shape[0], 768)
    root_path = f"{DATA_PATH}/Zalando_Germany_Dataset/dresses/images/square_images/"

    for row in tqdm(df.iterrows(), total=df.shape[0]):
        index = row[0]
        sku = row[1]['sku']
        # Load Image and preprocess
        img_path = f"{root_path}{sku}.jpg"
        img = Image.open(img_path).convert('RGB')
        input = processor(img, return_tensors="pt")
        input = input.to(device)
        # Perform forward pass
        with torch.no_grad():
            output = model(**input)
            embedding = output['pooler_output']
        # Assign embedding to embeddings
        embeddings[index,:] = embedding


    # Save embeddings to disc
    torch.save(embeddings, save_path)
else: 
    print('Loading embeddings from disk')
    embeddings = torch.load(save_path)

Loading embeddings from disk


## Get nearest neighbors

In [8]:
sample = df.sample(1)
sample

Unnamed: 0,sku,garment_type,label
7969,VE121C1SF-K17,casual_dresses,0
