In [None]:
import numpy as np
import pandas as pd
import torch
import torchvision.transforms as transforms
import timm

from PIL import Image
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

In [None]:
class MyDataset(Dataset):
    def __init__(self):
        self.transform = transforms.Compose(
            [
                transforms.Resize([384, 384]),
                transforms.ToTensor(),
                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            ]
        )
        train_anno = pd.read_csv("../data/eBay/metadata/train.csv").to_dict()
        val_anno = pd.read_csv("../data/eBay/metadata/val.csv").to_dict()
        self.total_imgs = list(train_anno["IMAGE_PATH"].values()) + list(val_anno["IMAGE_PATH"].values())

    def __len__(self):
        return len(self.total_imgs)

    def __getitem__(self, idx):
        img_loc = "../data/eBay/Images/" + self.total_imgs[idx]
        image = Image.open(img_loc).convert("RGB")
        tensor_image = self.transform(image)
        return tensor_image   

In [None]:
my_dataset = MyDataset()
data_loader = DataLoader(
    my_dataset, batch_size=32, shuffle=False, num_workers=4, drop_last=False
)

In [None]:
model = timm.create_model("convnext_base_384_in22ft1k", pretrained=True)
model.reset_classifier(-1)
model = model.eval().cuda()

In [None]:
image_feats = []
with torch.no_grad():
    for image in tqdm(data_loader):
        image = image.cuda()
        feats = model(image)
        image_feats.append(feats.cpu())
image_feats = torch.cat(image_feats, dim=0)

In [None]:
np.save("convnext384_feats.npy", image_feats.numpy())

In [None]:
import torch.nn as nn
from transformers import RobertaModel, RobertaTokenizer

class RoBERTa(nn.Module):
    def __init__(
        self,
        name: str = "roberta-base",
    ):
        super().__init__()
        self.tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
        self.model = RobertaModel.from_pretrained("roberta-base").eval()

    @torch.no_grad()
    def forward(self, input_text):
        inputs = self.tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
        for k, v in inputs.items():
            inputs[k] = v.to(self.model.device)
        outputs = self.model(**inputs).last_hidden_state
        masks = inputs["attention_mask"]
        outputs = outputs * masks.unsqueeze(2)
        outputs = torch.sum(outputs, dim=1) / torch.sum(masks, dim=1, keepdim=True)
        return outputs

In [None]:
class MyTextDataset(Dataset):
    def __init__(self):
        train_anno = pd.read_csv("../data/eBay/metadata/train.csv").to_dict()
        val_anno = pd.read_csv("../data/eBay/metadata/val.csv").to_dict()
        self.total_texts = list(train_anno["AUCT_TITL"].values()) + list(val_anno["AUCT_TITL"].values())

    def __len__(self):
        return len(self.total_texts)

    def __getitem__(self, idx):
        return self.total_texts[idx]  

In [None]:
my_textdataset = MyTextDataset()
text_data_loader = DataLoader(
    my_textdataset, batch_size=512, shuffle=False, num_workers=4, drop_last=False
)

In [None]:
roberta = RoBERTa()
roberta = roberta.eval().cuda()

In [None]:
text_feats = []
with torch.no_grad():
    for text in tqdm(text_data_loader):
        feats = roberta(text)
        text_feats.append(feats.cpu())
text_feats = torch.cat(text_feats, dim=0)

In [None]:
np.save("roberta_avg_feats.npy", text_feats.numpy())

In [None]:
class MyDataset(Dataset):
    def __init__(self):
        self.transform = transforms.Compose(
            [
                transforms.Resize([224, 224]),
                transforms.ToTensor(),
                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            ]
        )
        index_anno = pd.read_csv("../data/eBay/metadata/index.csv").to_dict()
        self.total_imgs = list(index_anno["IMAGE_PATH"].values())

    def __len__(self):
        return len(self.total_imgs)

    def __getitem__(self, idx):
        img_loc = "../data/eBay/Images/" + self.total_imgs[idx]
        image = Image.open(img_loc).convert("RGB")
        tensor_image = self.transform(image)
        return tensor_image 

In [None]:
my_dataset = MyDataset()
data_loader = DataLoader(
    my_dataset, batch_size=64, shuffle=False, num_workers=4, drop_last=False
)

In [None]:
model = timm.create_model("convnext_base_in22k", pretrained=True)
# head = model.head.eval().cuda()
# model.reset_classifier(-1)
model = model.eval().cuda()

In [None]:
# image_feats = []
image_labels = []
with torch.no_grad():
    for image in tqdm(data_loader):
        image = image.cuda()
#         feats_map = model.forward_features(image)
#         feats = model.forward_head(feats_map)
#         image_feats.append(feats.cpu())
#         logits = head(feats_map)
        logits = model(image)
        labels = torch.argmax(logits, dim=1)
        image_labels.append(labels.cpu())

# image_feats = torch.cat(image_feats, dim=0)
image_labels = torch.cat(image_labels, dim=0)

In [None]:
# np.save("query1_convnext384_feats.npy", image_feats.numpy())
np.save("index_in22k_labels.npy", image_labels.numpy())

In [None]:
len(set(image_labels.numpy()))