In [22]:
%cd ../

/home/chervovn04/Programming/hackathons/2022


In [42]:
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import string
import os
import matplotlib.pyplot as plt
from copy import deepcopy
from PIL import Image
from collections import Counter, OrderedDict
%matplotlib inline
%matplotlib widget

from glob import glob
from sklearn.metrics import *
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.applications.efficientnet import EfficientNetB0, EfficientNetB1, preprocess_input
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from model_structure import *

In [26]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f'Using {device} for inference')

Using cpu for inference


In [27]:
def AP(relevance):
    Ps = []
    count = 0
    for i, val in enumerate(relevance):
        i += 1
        if val:
            count += 1
            Ps.append(count / i)
    if not Ps:
        Ps = [0]
    return sum(Ps) / len(Ps)

def mAP(relevances):
    return sum([AP(relevance) for relevance in relevances]) / len(relevances)

In [28]:
def extract_features(path, models_data):    
    with torch.no_grad():
        features = []
        for model, tfm in models_data:
            image = Image.open(path).convert('RGB')
            image = tfm(image)

            image = image[None, :, :, :]
                        
            pred = np.array(model(image).detach()).squeeze()

            features.append(pred)
        features = np.concatenate(features, axis=0)
        features /= np.linalg.norm(features)
    return features

In [29]:
 def get_emb(data, models_data, base_dir='data/train/'):
    emb = []
    for idx in tqdm(data.idx):
        emb.append(extract_features(f'{base_dir}{idx}.png', models_data))
    emb = np.array(emb)
    return emb
        
def evaluate(db, que, db_emb, que_emb, need_processing=0):
    if need_processing:
        db = process(db)
        que = process(que)
    
    neigh = NearestNeighbors(n_neighbors=10, metric='cosine')
    neigh.fit(db_emb)
    
    distances, idxs = neigh.kneighbors(que_emb, 10, return_distance=True)
    relevances = []
    for i in range(idxs.shape[0]):
        name = que.item_nm.iloc[i]
        que_rec = idxs[i]
        
        relevance = []
        for idx in que_rec:
            relevance.append(name == db.item_nm.iloc[idx])
        relevances.append(relevance)
    return mAP(relevances)

In [30]:
data = pd.read_csv('data/train.csv')
db, que = train_test_split(data, train_size=0.8)

In [31]:
class ViT_H_Wrapper(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model
    def forward(self, x):
        x = self.model(x)
        return x.pooler_output
    
class SiameseModel(nn.Module):
    def __init__(self, extractor):
        super().__init__()
        self.extractor = extractor
    def forward(self, image_0, image_1):
        return self.extractor(image_0), self.extractor(image_1)

In [32]:
def get_vit_model(path):
    model = torch.load(path, map_location=torch.device('cpu'))
    model.eval()
    model.fc = Identical()
    return model

def get_HF_vit_model(path):
    vit = torch.load(path, map_location=torch.device('cpu'))
    vit.eval()
    model = ViT_H_Wrapper(vit)
    model.eval()
    return model

def common_load(path):
    model = torch.load(path, map_location=torch.device('cpu'))
    model.eval()
    return model

In [50]:
models = [
    (get_vit_model('weights/resnext101_32x48.pt'), transforms.Compose([
        # transforms.Resize(224), 
        # transforms.CenterCrop(224), 
        transforms.ToTensor(), 
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
    ])),
]

In [51]:
db_emb = get_emb(db, models)
que_emb = get_emb(que, models)
evaluate(db, que, db_emb, que_emb)

100%|█████████████████████████████████████| 3900/3900 [3:37:46<00:00,  3.35s/it]
100%|█████████████████████████████████████████| 976/976 [53:41<00:00,  3.30s/it]


0.08245255849379832

In [None]:
# # shapes = (224, 240, 260, 300, 380, 456, 528)

# model = load_model('weights/efficientnetb0.h5')

# emb_finders = [
#     ('efficientnetb0', ('avg_pool', 'block7a_se_reshape'), 3, 224),
#     ('efficientnetb1', ('avg_pool', 'block7a_se_reshape', 'block7b_se_reshape'), 2, 240),
#     ('efficientnetb2', ('avg_pool', 'block7a_se_reshape', 'block7b_se_reshape'), 2, 260),
#     ('efficientnetb3', ('avg_pool', 'block7a_se_reshape', 'block7b_se_reshape'), 2, 300)
# ]

# models_data = []
# for model_type, layers, coef, shape in emb_finders:
#     model = load_model(f'weights/{model_type}.h5')
#     for layer in layers:
#         models_data.append((
#             Model(inputs=model.input, outputs=model.get_layer(layer).output),
#             (shape, shape),
#             coef
#         ))