In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import pickle


dirpath = '../datathon-fme-mango/archive/'
imagepath = dirpath + 'images/images/'

## Resnet50 embeddings

In [None]:
import torch
from torchvision import models, transforms

EMBEDDING_DIM = 1024

# load the model
model = models.resnet50(pretrained=True)
model = torch.nn.Sequential(*list(model.children())[:-1])
model.eval()

# load the image
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# load the image
def load_image(imagepath):
    image = Image.open(imagepath)
    image = transform(image)
    image = image.unsqueeze(0)
    return image

# generate the embedding
def generate_embedding(imagepath):
    image = load_image(imagepath)
    with torch.no_grad():
        embedding = model(image)
    return embedding

# generate the embeddings for all images
imagepaths = []
embeddings = {}
for image in tqdm(os.listdir(imagepath)):
    try:
        imagepaths.append(image)
        embedding = generate_embedding(imagepath + image)
        embeddings[image] = embedding
    except:
        print(f'Error with {image}')

# save the embeddings
embeddings = {k: v.numpy().flatten() for k, v in embeddings.items()}
# save as pickle
with open('embeddings/embeddings_resnet50.pkl', 'wb') as f:
    pickle.dump(embeddings, f)

100%|██████████| 1000/1000 [01:46<00:00,  9.39it/s]


In [None]:
# %pip install fashion-clip 

## FashionCLIP embeddings

In [None]:
from fashion_clip.fashion_clip import FashionCLIP

fclip = FashionCLIP('fashion-clip')

BATCH_SIZE = 32

def has_error(imagepath):
    try:
        _ = Image.open(imagepath)
        return False
    except:
        return True

imagepaths = [image for image in os.listdir(imagepath) if not has_error(imagepath + image)]
imagefullpaths = [imagepath + image for image in imagepaths]
embeddings = fclip.encode_images(imagefullpaths, batch_size=BATCH_SIZE)

embeddings_dict = {image: embedding for image, embedding in zip(imagepaths, embeddings)}

# save as pickle
with open('embeddings/embeddings_fclip.pkl', 'wb') as f:
    pickle.dump(embeddings_dict, f)

  from .autonotebook import tqdm as notebook_tqdm
