In [1]:
import pandas as pd
import numpy as np
import torch
import os
import sys
sys.path.append("/home/docker_current/py_files/MLCLIP_exp")
sys.path.append("/home/docker_current/py_files/finetune_exp/metaclip_arch")
sys.path.append("/home/docker_current/py_files/utils")
from MLCLIP_utils import get_text_encode_model, get_image_encode_model

device = "cuda:1" if torch.cuda.is_available() else "cpu"

In [2]:
from ruCLIP import RUCLIPPredictor
predictor = RUCLIPPredictor()

In [3]:
from metaclip_v5 import MetaCLIP

metaclip_model = MetaCLIP(ratio=0.5)
path_weights = "./weights/ruclip_v5.pth"

metaclip_model.load_state_dict(torch.load(path_weights, map_location=torch.device('cpu')) )
metaclip_model.to(device)
metaclip_model.eval()
metaclip_model.ratio

0.5

In [4]:
df_test = pd.read_csv("/home/docker_current/datasets/test.csv")
df_test['text_features'] = None
df_test['object_img'] = None

# считываем тестовые изображения, делаем словарь с эмбеддингами
test_images = [i.split('.png')[0] for i in os.listdir("/home/docker_current/datasets/test")]
test_embed = {i:None for i in test_images}

In [5]:
from tqdm.notebook import tqdm

for ind_text in tqdm(range(len(df_test))):
    text = df_test['description'][ind_text]
    text_features = predictor.inference_text(text)
    with torch.no_grad():
        text_features = metaclip_model.encode_image(text_features.to(device)).cpu().detach().numpy()
    df_test['text_features'][ind_text] = text_features

  0%|          | 0/900 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [7]:
from PIL import Image

for name_image in tqdm(test_embed.keys()):
    sample_image_path = "/home/docker_current/datasets/test/" + str(name_image) + ".png"
    image = Image.open(sample_image_path)
    image_features = predictor.inference_image(image)
    with torch.no_grad():
        image_features = metaclip_model.encode_text(image_features.to(device)).cpu().detach().numpy()
    test_embed[name_image] = image_features

  0%|          | 0/900 [00:00<?, ?it/s]

1it [00:00, 27.02it/s]
1it [00:00, 62.79it/s]
1it [00:00, 55.89it/s]
1it [00:00, 56.01it/s]
1it [00:00, 53.12it/s]
1it [00:00, 39.52it/s]
1it [00:00, 51.36it/s]
1it [00:00, 41.14it/s]
1it [00:00, 38.89it/s]
1it [00:00, 50.07it/s]
1it [00:00, 51.20it/s]
1it [00:00, 66.57it/s]
1it [00:00, 44.11it/s]
1it [00:00, 44.63it/s]
1it [00:00, 44.83it/s]
1it [00:00, 53.25it/s]
1it [00:00, 45.02it/s]
1it [00:00, 58.41it/s]
1it [00:00, 46.10it/s]
1it [00:00, 55.70it/s]
1it [00:00, 63.03it/s]
1it [00:00, 65.15it/s]
1it [00:00, 41.54it/s]
1it [00:00, 44.75it/s]
1it [00:00, 61.68it/s]
1it [00:00, 57.32it/s]
1it [00:00, 58.13it/s]
1it [00:00, 41.84it/s]
1it [00:00, 38.29it/s]
1it [00:00, 55.36it/s]
1it [00:00, 60.85it/s]
1it [00:00, 58.21it/s]
1it [00:00, 50.60it/s]
1it [00:00, 43.25it/s]
1it [00:00, 56.04it/s]
1it [00:00, 42.77it/s]
1it [00:00, 46.97it/s]
1it [00:00, 55.77it/s]
1it [00:00, 56.67it/s]
1it [00:00, 47.18it/s]
1it [00:00, 49.49it/s]
1it [00:00, 48.48it/s]
1it [00:00, 52.25it/s]
1it [00:00,

In [8]:
from copy import deepcopy
copy_test_embed = deepcopy(test_embed)

In [9]:
cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)

def get_similarity(image_emb, text_emb):
    sim =cos(torch.Tensor(image_emb), 
                        torch.Tensor(text_emb))
    return sim

In [10]:
preds = []

for ind_text in tqdm(range(len(df_test))):

    sims = []
    for image_name in copy_test_embed.keys():
        sim = get_similarity(copy_test_embed[image_name], 
                                    df_test['text_features'][ind_text])
        sims.append(sim)

    sims = np.array(sims)
    ind_max = np.argmax(sims)
    match_image = list(copy_test_embed.keys())[ind_max]
    preds.append(match_image)
    del copy_test_embed[match_image]

  0%|          | 0/900 [00:00<?, ?it/s]

In [11]:
submit = pd.read_csv('/home/docker_current/py_files/sample_solution.csv')
submit['object_img'] = preds
submit['object_img'] = submit['object_img'].astype(np.int64)
submit.to_csv('./submits/ruclip_05.csv', index=False)