In [12]:
# #Setup Gdrive file download extention
!pip install gdown open-clip-torch faiss-gpu

Collecting gdown
  Downloading gdown-4.7.1-py3-none-any.whl (15 kB)
Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: faiss-gpu, gdown
Successfully installed faiss-gpu-1.7.2 gdown-4.7.1


In [13]:
!gdown --id 1BFAJfzzeaUGsPoYELS86HIutJ43D-vat

Downloading...
From (uriginal): https://drive.google.com/uc?id=1BFAJfzzeaUGsPoYELS86HIutJ43D-vat
From (redirected): https://drive.google.com/uc?id=1BFAJfzzeaUGsPoYELS86HIutJ43D-vat&confirm=t&uuid=5b3d3333-10eb-4db9-9ae5-b6a804957041
To: /kaggle/working/model_weights.pt
100%|██████████████████████████████████████| 1.26G/1.26G [00:12<00:00, 97.8MB/s]


In [14]:
import os
import math

import numpy as np
 
import torch as th
import torch.nn as nn
import torch.nn.functional as F
import open_clip

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import torchvision.transforms as T

from tqdm import tqdm
import pandas as pd
import cv2
from PIL import Image
import time
import faiss
import copy
import argparse

In [15]:
def compute_precision_at_k(ranked_targets: np.ndarray,
                           k: int) -> float:

    """
    Computes the precision at k.
    Args:
        ranked_targets: A boolean array of retrieved targets, True if relevant and False otherwise.
        k: The number of examples to consider

    Returns: The precision at k
    """
    assert k >= 1
    assert ranked_targets.size >= k, ValueError('Relevance score length < k')
    return np.mean(ranked_targets[:k])

def compute_average_precision(ranked_targets: np.ndarray,
                              gtp: int) -> float:
    
        
    """
    Computes the average precision.
    Args:
        ranked_targets: A boolean array of retrieved targets, True if relevant and False otherwise.
        gtp: ground truth positives.

    Returns:
        The average precision.
    """
    assert gtp >= 1
    # compute precision at rank only for positive targets
    out = [compute_precision_at_k(ranked_targets, k + 1) for k in range(ranked_targets.size) if ranked_targets[k]]
    if len(out) == 0:
        # no relevant targets in top1000 results
        return 0.0
    else:
        return np.sum(out) / gtp


def calculate_map(ranked_retrieval_results: np.ndarray,
                  query_labels: np.ndarray,
                  gallery_labels: np.ndarray) -> float:
    
    global current_retrievals, gpt
    
    """
    Calculates the mean average precision.
    Args:
        ranked_retrieval_results: A 2D array of ranked retrieval results (shape: n_queries x 1000), because we use
                                top1000 retrieval results.
        query_labels: A 1D array of query class labels (shape: n_queries).
        gallery_labels: A 1D array of gallery class labels (shape: n_gallery_items).
    Returns:
        The mean average precision.
    """
    assert ranked_retrieval_results.ndim == 2
    assert ranked_retrieval_results.shape[1] == 1000

    class_average_precisions = []
    current_retrievals = []

    class_ids, class_counts = np.unique(gallery_labels, return_counts=True)
    class_id2quantity_dict = dict(zip(class_ids, class_counts))
    for gallery_indices, query_class_id in tqdm(
                            zip(ranked_retrieval_results, query_labels),
                            total=len(query_labels)):
        # Checking that no image is repeated in the retrival results
        assert len(np.unique(gallery_indices)) == len(gallery_indices), \
                    ValueError('Repeated images in retrieval results')

        current_retrieval = gallery_labels[gallery_indices] == query_class_id
        gpt = class_id2quantity_dict[query_class_id]
        
        current_retrievals.append(current_retrieval)

        class_average_precisions.append(
            compute_average_precision(current_retrieval, gpt)
        )

    mean_average_precision = np.mean(class_average_precisions)
    return mean_average_precision

In [16]:
import numpy as np
import time

def calculate_l2_distances(query, gallery):
    return np.linalg.norm(gallery - query, axis=1)

def get_k_nearest_neighbors(distances, k):
    indices = np.argsort(distances)[:k]
    return indices

def get_similiarity_l2(embeddings_gallery, embeddings_query, k):
    print('Processing indices...')

    s = time.time()

    scores = []
    indices = []

    for query in embeddings_query:
        distances = calculate_l2_distances(query, embeddings_gallery)
        nearest_indices = get_k_nearest_neighbors(distances, k)
        scores.append(distances[nearest_indices])
        indices.append(nearest_indices)

    e = time.time()

    print(f'Finished processing indices, took {e - s}s')
    return np.array(scores), np.array(indices)

In [17]:
def convert_indices_to_labels(indices, labels):
    indices_copy = copy.deepcopy(indices)
    for row in indices_copy:
        for j in range(len(row)):
            row[j] = labels[row[j]]
    return indices_copy

In [18]:
device = 'cuda' if th.cuda.is_available() else 'cpu';

# Reading Dataset

In [19]:
def read_image(image_file):
    img = cv2.imread(
        image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
    )
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    if img is None:
        raise ValueError('Failed to read {}'.format(image_file))
    return img

class SubmissionDataset(Dataset):
    def __init__(self, root, annotation_file, transforms, with_bbox=False):
        self.root = root
        self.imlist = pd.read_csv(annotation_file)
        self.transforms = transforms
        self.with_bbox = with_bbox

    def __getitem__(self, index):
        cv2.setNumThreads(6)

        full_imname = os.path.join(self.root, self.imlist['img_path'][index])
        img = read_image(full_imname)

        if self.with_bbox:
            x, y, w, h = self.imlist.loc[index, 'bbox_x':'bbox_h']
            img = img[y:y+h, x:x+w, :]

        img = Image.fromarray(img)
        img = self.transforms(img)
        product_id = self.imlist['product_id'][index]
        return img, product_id

    def __len__(self):
        return len(self.imlist)

In [20]:
def get_transform():  
    transform = T.Compose([
            T.Resize(
                size=(224, 224), 
                interpolation=T.InterpolationMode.BICUBIC,
                antialias=True),
            T.ToTensor(), 
            T.Normalize(
                mean=(0.48145466, 0.4578275, 0.40821073), 
                std=(0.26862954, 0.26130258, 0.27577711)
            )
        ])
    return transform

@th.no_grad()
def get_feature_vector(model, dataloader, use_cuda=True):
    features = []
    product_id = []
    
    for imgs, p_id in tqdm(dataloader):
        if use_cuda:
            imgs = imgs.cuda()
        features.append(th.squeeze(model(imgs.half())).detach().cpu().numpy().astype(np.float32))
        product_id.append(th.squeeze(p_id).detach().cpu().numpy())

    return np.concatenate(features, axis=0), np.concatenate(product_id)

# Getting the CLIP model's embedding

In [21]:
# weights_path_large = '/kaggle/input/vit-l-14-0-52/vit-l-14-1-0.52.pt'

# vit_backbone = open_clip.create_model_and_transforms('ViT-L-14', None)[0].visual
# vit_backbone.load_state_dict(th.load(weights_path_large)['model_state_dict'])
# vit_backbone.half()   # Apply half precision to the backbone model
# vit_backbone.eval()   # Dropping unecessary layers
# model = vit_backbone
# model.cuda()

In [24]:
weights_path_huge = '/kaggle/working/convnext_large_d_320-soup.pt'

vit_backbone = open_clip.create_model_and_transforms('convnext_large_d_320', None)[0].visual
vit_backbone.load_state_dict(th.load(weights_path_huge))
vit_backbone.half()   # Apply half precision to the backbone model
vit_backbone.eval()   # Dropping unecessary layers
model = vit_backbone
model.cuda();

In [26]:
# import torch
# # Specify the path for the binary file
# binary_file_path = "/kaggle/working/model.bin"

# # Save the model's state dictionary to the binary file
# torch.save(model, binary_file_path)

In [27]:
# import zipfile
# import os

# def zip_folder(folder_path, zip_filename):
#     with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
#         for root, _, files in os.walk(folder_path):
#             for file in files:
#                 file_path = os.path.join(root, file)
#                 arcname = os.path.relpath(file_path, folder_path)
#                 zipf.write(file_path, arcname)

# # Replace 'your_folder_path' with the actual path to the folder you want to zip
# folder_to_zip = '/kaggle/working'
# output_zip_path = 'VIT-H-14.zip'

# zip_folder(folder_to_zip, output_zip_path)

# from IPython.display import FileLink

# # Display a download link for the zip file
# FileLink(output_zip_path)

In [28]:
# !rm model1.pt

In [29]:
transform = get_transform()

img_dir = "/kaggle/input/vprtestdata/public_dataset/"

dataset_train = SubmissionDataset(img_dir, os.path.join(img_dir, "gallery.csv"), transform)
dataloader_train = DataLoader(dataset_train, batch_size=512, num_workers=4)
dataset_test = SubmissionDataset(img_dir, os.path.join(img_dir, "queries.csv"), transform, with_bbox=True)
dataloader_test = DataLoader(dataset_test, batch_size=512, num_workers=4)

In [30]:
def predict(model):
    global feature_vectors_gallery, labels_gallery
    
    feature_vectors_gallery, labels_gallery = get_feature_vector(model, dataloader_train, 1)
    feature_vectors_query, labels_query = get_feature_vector(model, dataloader_test, 1)
    
    scores, indices = get_similiarity_l2(feature_vectors_gallery, feature_vectors_query, 1000)

    indices = indices.tolist()
    labels_gallery = labels_gallery.tolist()
    labels_query = labels_query.tolist()

    return indices

In [31]:
preds = predict(model)

preds_df = pd.DataFrame(preds)
preds_df.to_csv('preds.csv', index=False)

100%|██████████| 3/3 [00:35<00:00, 11.93s/it]
100%|██████████| 4/4 [00:38<00:00,  9.64s/it]


Processing indices...
Finished processing indices, took 2.4535932540893555s


In [32]:
preds_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
0,768,942,335,923,193,285,266,714,136,507,...,759,483,791,663,690,800,905,52,130,529
1,8,120,426,196,991,214,927,689,950,275,...,409,809,287,704,428,101,951,471,757,1
2,494,205,734,1041,829,631,201,1046,946,375,...,208,1057,573,626,981,429,457,156,206,673
3,384,322,778,185,824,1036,416,641,500,280,...,759,468,823,344,965,1018,423,443,289,621
4,363,286,589,294,997,692,975,562,387,868,...,105,613,253,175,491,1022,1027,19,350,118


In [33]:
seller_gt = pd.read_csv('/kaggle/input/vprtestdata/public_dataset/gallery.csv')
gallery_labels = seller_gt['product_id'].values
user_gt = pd.read_csv('/kaggle/input/vprtestdata/public_dataset/queries.csv')
query_labels = user_gt['product_id'].values

# Evalaute metrics
print("Evaluation Results")
results = {"mAP": calculate_map(np.array(preds), query_labels, gallery_labels)}
print(results)

Evaluation Results


100%|██████████| 1935/1935 [00:00<00:00, 3702.30it/s]

{'mAP': 0.49652107022765996}





# sample image similarity search

In [None]:
len(current_retrievals)

In [None]:
def transform_img(image):
    img = image
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    if isinstance(img, np.ndarray):
        img =  Image.fromarray(img)
        
    img = transform(img)

    return img

In [None]:
@th.no_grad()
def get_feature_vector_img(model, imgs, use_cuda=True):
    features = []
    if use_cuda:
        imgs = imgs.cuda()
    x = (model(imgs.half())).detach().cpu().numpy().astype(np.float32)  # .half()
    print(model(imgs.half()).shape)
    features.append(x)

    return np.concatenate(features, axis=0)

In [None]:
img_path = '/kaggle/input/vprtestdata/public_dataset/queries/accelerated-glorious-fennec-of-reward.jpg'

def get_similar_prods(img_path):

    image = read_image(img_path)
    image = transform_img(image)
    image = image.unsqueeze(dim=0)
#     feature_vectors_gallery, labels_gallery = get_feature_vector(model, dataloader_train, 1)
    feature_vectors_query = get_feature_vector_img(model, image, 1)
    scores, indices = get_similiarity_l2(feature_vectors_gallery, feature_vectors_query, 1000)
    preds = convert_indices_to_labels(indices, labels_gallery)
    indices = indices.tolist()

    return [indices , preds]
    
[similar_images, labels] = get_similar_prods(img_path)

In [None]:
csv_path_q = '/kaggle/input/vprtestdata/public_dataset/queries.csv'  
data_q = pd.read_csv(csv_path_q)

x = data_q[data_q['img_path'] == 'queries/accelerated-glorious-fennec-of-reward.jpg']

x

In [None]:
preds_df1 = pd.DataFrame(similar_images)

preds_df1

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

img = Image.open(img_path)

img = img.resize((224, 224))

plt.figure(figsize=(4, 4))
plt.imshow(img)
plt.axis('off')
plt.savefig('query.png')  
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

csv_path = '/kaggle/input/vprtestdata/public_dataset/gallery.csv'  
data = pd.read_csv(csv_path)

prod_ids = similar_images[0][:100]  

num_images = len(prod_ids)
num_columns = 10
num_rows = (num_images + num_columns - 1) // num_columns

fig, axes = plt.subplots(num_rows, num_columns, figsize=(15, 15))

for i, prod_id in enumerate(prod_ids):
    row = data[data['seller_img_id'] == prod_id]
    
    if not row.empty:
        image_path = '/kaggle/input/vprtestdata/public_dataset/' + row.iloc[0]['img_path']
        img = Image.open(image_path)
        
        img = img.resize((224, 224))
        
        row_idx = i // num_columns
        col_idx = i % num_columns
        
        ax = axes[row_idx, col_idx]
        ax.imshow(img)
        ax.set_title(f"Image ID: {prod_id}")
        ax.axis('off')

for i in range(num_images, num_rows * num_columns):
    fig.delaxes(axes.flatten()[i])

plt.tight_layout()

plt.savefig('inference.png')  

plt.show()


In [None]:
# Download custom image
import requests

# Setup custom image path
custom_image_path = data_path / "04-pizza-dad.jpeg"

# Download the image if it doesn't already exist
if not custom_image_path.is_file():
    with open(custom_image_path, "wb") as f:
        # When downloading from GitHub, need to use the "raw" file link
        request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/images/04-pizza-dad.jpeg")
        print(f"Downloading {custom_image_path}...")
        f.write(request.content)
else:
    print(f"{custom_image_path} already exists, skipping download.")