In [None]:
from sklearn.utils import shuffle
from tqdm import tqdm
import pandas as pd
import os
read_data = {}
data = pd.read_csv("/kaggle/input/add-csv/pairs_for_training.csv")
data = shuffle(data)
prefix = "/kaggle/input/imageretrievaldataset/"
a = [data.loc[i].tolist()[1:5] for i in tqdm(range(len(data)))]
b = []
k = 0
i = 0
pbar = tqdm(total=len(data)+1)
while i < len(data):
    if os.path.exists(prefix+a[i][0]) and os.path.exists(prefix+a[i][1]):
        b.append([prefix+a[i][0], prefix+a[i][1], a[i][3], a[i][2]])
        k+=1
        pbar.update(1)
    i += 1
pbar.close()
print("Available: ", k)
read_data['train'] = b

In [None]:
data = pd.read_csv("/kaggle/input/imageretrievaldataset/pairs_for_testing.csv")
data = shuffle(data)
prefix = "/kaggle/input/imageretrievaldataset/"
a = [data.loc[i].tolist()[1:6] for i in tqdm(range(len(data)))]
b = []
k = 0
i = 0
pbar = tqdm(total=len(data)+1)
while i < len(data):
    if os.path.exists(prefix+a[i][0]) and os.path.exists(prefix+a[i][1]):
        b.append([prefix+a[i][0], prefix+a[i][1], a[i][4], a[i][3]])
        k+=1
        pbar.update(1)
    i += 1
pbar.close()
print("Available: ", k)
read_data['test'] = b

In [None]:
from torch.utils.data import Dataset

class huydataset(Dataset):
    def __init__(self, file):
        firsts = []
        seconds = []
        labels = []
        for line in file:
            path1, path2, label, _ = line
            firsts.append(path1)
            seconds.append(path2)
            labels.append(float(label))
        self.data = {"firsts":firsts, "seconds":seconds, "labels":labels}
        self.preprocess = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
    
    def __len__(self):
        return len(self.data["labels"])
    
    def __getitem__(self, idx):
        def merge_width(image1, image2):
            new_image = Image.new("RGB", (image1.size[0], image1.size[1]+image2.size[1]), (250, 250, 250))
            new_image.paste(image1,(0,0))
            new_image.paste(image2,(0,image1.size[1]))
            return new_image
        
        def merge_height(image1, image2):
            new_image = Image.new("RGB", (256, 256), (250, 250, 250))
            new_image.paste(image1,(0,0))
            new_image.paste(image2,(128,0))
            return new_image
        
        img1 = Image.open(self.data["firsts"][idx])
        img1 = transforms.Resize((256, 128))(img1)
        img2 = Image.open(self.data["seconds"][idx])
        img2 = transforms.Resize((256, 128))(img2)
        merged_imgs = self.preprocess(merge_height(img1, img2)).unsqueeze(0)
        return {"labels":self.data["labels"][idx], "merged_imgs":merged_imgs}
        
    def collate_fn(self, batch):
        def get_data(batch):
            data = {i:[] for i in batch[0]}
            for i in batch:
                for k in i:
                    data[k].append(i[k])
            return data
        
        batch = get_data(batch)
        batch["merged_imgs"] = torch.cat(batch["merged_imgs"], dim=0)
        batch["labels"] = torch.tensor(batch["labels"])
        return batch

In [None]:
import torch.nn as nn
import torch

class rerankingEfficientNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=True)
        self.model.classifier.fc = nn.Linear(1280, 1)
        self.model.train()
        self.act = nn.Sigmoid()
        self.crit = nn.MSELoss()
    
    def forward(self, labels, merged_imgs):
        logits = self.model(merged_imgs)
        output = self.act(logits)
        loss = self.crit(output, labels.unsqueeze(1))
        return loss, output
    
     

In [None]:
from transformers import ViTImageProcessor, ViTForImageClassification, ViTModel
import torchvision.transforms as T
from torch.utils.data import DataLoader
import torch
from tqdm import tqdm
from PIL import Image
from torchvision import transforms


train_dataset = huydataset(read_data['train'])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=train_dataset.collate_fn)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = rerankingEfficientNet().to(device)
optimizer = torch.optim.AdamW(params  = model.parameters(), lr = 1e-3)
losses = []
avg_losses = []
for epoch in range(5):
    train_iters = tqdm(train_loader)
    avg_loss = 0
    losse = []
    avg_losse = []
    for idx, batch in enumerate(train_iters):
        for key in ['labels', 'merged_imgs']:
            batch[key] = batch[key].to(device)
        loss, output = model(**batch)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        avg_loss = (avg_loss*idx+loss)/(idx+1)
        train_iters.set_postfix({"avg_loss":avg_loss.item(), 'loss':loss.item()})
        losse.append(loss.item())
        avg_losse.append(avg_loss.item())
    losses.append(losse)
    avg_losses.append(avg_losse)
    
    torch.save(model.state_dict(), "/kaggle/working/model.bin")
    from huggingface_hub import HfApi
    api = HfApi()
    api.upload_file(
        path_or_fileobj="/kaggle/working/model.bin",
        path_in_repo="model.bin",
        repo_id="Huy1432884/rerankingShuffleNet",
        repo_type="model",
        token="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"
    )
    
    