# MODELS TO BE ENSEMBLE

In [None]:
# path1 is better model
path1 = f'./my_experiments/soup_slim_p10k_h_m_image_net_happy_whale_w_03.pt' 
path2 = f'./my_experiments/ViT-H-14-laion2b_s32b_b79k-image_net-v2-p10k-h&m-amazon-Arcface(k=3)-All-Epoch(10)-Reduce_LR_0.1/model_epoch_2_mAP3_0.58_slim.pt'

In [None]:
import os
import math


import numpy as np
 
import torch as th
import torch.nn as nn
import torch.nn.functional as F
import open_clip

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import torchvision.transforms as T

from tqdm import tqdm
import pandas as pd
import cv2
from PIL import Image

import utilities

In [None]:
# get product 10k

def read_img(img_path, is_gray=False):
    mode = cv2.IMREAD_COLOR if not is_gray else cv2.IMREAD_GRAYSCALE
    img = cv2.imread(img_path, mode)
    if not is_gray:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

class ProductDataset(Dataset):
    def __init__(self, 
                 img_dir, 
                 annotations_file, 
                 transform=None, 
                 final_transform=None, 
                 headers=None,
                 test_mode=False):
        self.data = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.final_transform = final_transform
        self.headers = {"img_path": "img_path", "product_id": "product_id"}
        if headers:
            self.headers = headers
        self.test_mode = test_mode
            
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.data[self.headers["img_path"]][idx])
        
        img = read_img(img_path)
        if self.test_mode:
            x, y, w, h = self.data["bbox_x"][idx], self.data["bbox_y"][idx], \
                         self.data["bbox_w"][idx], self.data["bbox_h"][idx]
            img = img[y:y+h, x:x+w]
            
        
        if self.transform is not None:
            img = transform(image=img)["image"]
        
        if self.final_transform is not None:
            if isinstance(img, np.ndarray):
                img =  Image.fromarray(img)
            img = self.final_transform(img)
            
        product_id = self.data[self.headers["product_id"]][idx]
        return img, product_id
    
def get_final_transform():  
    final_transform = T.Compose([
            T.Resize(
                size=(224, 224), 
                interpolation=T.InterpolationMode.BICUBIC,
                antialias=True),
            T.ToTensor(), 
            T.Normalize(
                mean=(0.48145466, 0.4578275, 0.40821073), 
                std=(0.26862954, 0.26130258, 0.27577711)
            )
        ])
    return final_transform

@th.no_grad()
def extract_embeddings(model, dataloader, epoch=10, use_cuda=True):
    features = []
    product_id = []
    
    for _ in range(epoch):
        for imgs, p_id in tqdm(dataloader):
            if use_cuda:
                imgs = imgs.cuda()
            features.append(th.squeeze(model(imgs.half())).detach().cpu().numpy().astype(np.float32))
            product_id.append(th.squeeze(p_id).detach().cpu().numpy())


    return np.concatenate(features, axis=0), np.concatenate(product_id)

# ENSEMBLE STEP

In [None]:
backbone = open_clip.create_model_and_transforms('ViT-H-14', None)[0].visual
backbone.load_state_dict(th.load(path1))
backbone.half()
backbone.eval()

In [None]:
final_transform = get_final_transform()
img_dir = "../development_test_data"
dataset_test = ProductDataset(img_dir, os.path.join(img_dir, "queries.csv"), None, final_transform, test_mode=True)
dataloader_test = DataLoader(dataset_test, batch_size=512, num_workers=4)

dataset_train = ProductDataset(img_dir, os.path.join(img_dir, "gallery.csv"), None, final_transform)
dataloader_train = DataLoader(dataset_train, batch_size=512, num_workers=4)

img_dir = "../amazon_dataset_1"
headers = {"img_path": "path", "product_id": "id"}
dataset_test = ProductDataset(img_dir, os.path.join(img_dir, "query.csv"), None, final_transform, headers=headers)
dataloader_amazon_test = DataLoader(dataset_test, batch_size=512, num_workers=4)

dataset_train = ProductDataset(img_dir, os.path.join(img_dir, "gallery.csv"), None, final_transform, headers=headers)
dataloader_amazon_train = DataLoader(dataset_train, batch_size=512, num_workers=4)

In [None]:

@th.no_grad()
def compute_score_test_data(model):
    embeddings_query, labels_query = extract_embeddings(model, dataloader_test, 1)
    embeddings_gallery, labels_gallery = extract_embeddings(model, dataloader_train, 1)

    _, indices = utilities.get_similiarity_l2(embeddings_gallery, embeddings_query, 1000)


    indices = indices.tolist()
    labels_gallery = labels_gallery.tolist()
    labels_query = labels_query.tolist()

    preds = utilities.convert_indices_to_labels(indices, labels_gallery)
    score = utilities.map_per_set(labels_query, preds)
    
    return score

@th.no_grad()
def compute_score_amazon_data(model):
    embeddings_query, labels_query = extract_embeddings(model, dataloader_amazon_test, 1)
    embeddings_gallery, labels_gallery = extract_embeddings(model, dataloader_amazon_train, 1)

    _, indices = utilities.get_similiarity_l2(embeddings_gallery, embeddings_query, 1000)


    indices = indices.tolist()
    labels_gallery = labels_gallery.tolist()
    labels_query = labels_query.tolist()

    preds = utilities.convert_indices_to_labels(indices, labels_gallery)
    score = utilities.map_per_set(labels_query, preds)
    
    return score

In [None]:
n = 20
W = [i/n for i in range(0, n+1)]
m_ap_test = []
m_ap_amazon = []

with th.no_grad():
    for w in W:
        model = open_clip.create_model_and_transforms('ViT-H-14', None)[0].visual
        model.load_state_dict(th.load(path2))
        model.half()
        model.eval()

        for o, b in zip(model.parameters(), backbone.parameters()):
            o.data = w*o + (1 - w)*b

        model.cuda()
        #score_amazon = compute_score_amazon_data(model)
        score_amazon = 0
        score_test = compute_score_test_data(model)

        print(f'Weight {w} - test score {score_test} | amazon score {score_amazon}')
        m_ap_test.append(score_test)
        #m_ap_amazon.append(score_amazon)
        

In [None]:
model = backbone
model.cuda()
score_amazon = compute_score_amazon_data(model)
score_test = compute_score_test_data(model)
score_amazon, score_test

In [None]:
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
plt.plot(W, m_ap)

# SELECT THE BEST w

In [None]:
w = 0.35
soup_path = f'./my_experiments/soup_slim_p10k_h_m_amazon_image_net_happy_whale.pt'

In [None]:
with th.no_grad():
    model = open_clip.create_model_and_transforms('ViT-H-14', None)[0].visual
    model.load_state_dict(th.load(path2))
    model.half()
    model.eval()

    for o, b in zip(model.parameters(), backbone.parameters()):
        o.data = w*o + (1 - w)*b

    th.save(model.state_dict(), soup_path)