In [1]:
import torch
import pandas as pd
import torch.nn as nn
import pandas as pd
import numpy as np
import pickle

from torchvision.models import vit_b_32
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision.models import ViT_B_32_Weights
from torch.utils.data import DataLoader, Dataset,DataLoader,random_split
from torch.optim import Adam
from PIL import Image
from torchvision.transforms import Resize,Compose, ToTensor

In [2]:
# -------------------------------------------------------- Global settings --------------------------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Currently running on {device}".format())

BATCH_SIZE = 8
RESHAPE_SIZE = 512
NUM_EPOCHS = 50
GND_FILEPATH = '/../../Results/deepretrieval-rparis6k-ap.csv'
FOLD_PATH = '/../../Folds/rparis6k-folds.pkl'

Currently running on cuda


In [3]:
def compute_loss(model , loader):
    
    total_loss = 0.0
    
    with torch.no_grad():
        for data in loader:
            images, scores, paths = data
            
            images = images.to(device,dtype=torch.float)
            scores = scores.to(device).unsqueeze(1)
            
            outputs = model(images)
            
            loss = criterion(outputs, scores)
            total_loss += loss.item()
    
    total_loss /= len(loader)
    
    return total_loss

In [4]:
def train_model(model, train_dataloader, validation_dataloader, optimizer , criterion):
    
    min_loss = 1000
    for i in range(NUM_EPOCHS):
        
        epoch_train_loss = 0
        
        for idx, data in enumerate(train_dataloader):
            #print("Batch num {}/{}".format(idx+1, len(train_dataloader)))
 
            (images,scores,img_path) = data
    
            images = images.to(device)
            scores = scores.to(device).unsqueeze(1)
            
            optimizer.zero_grad()
            
            outputs = model(images)
            loss = criterion(scores, outputs)

            loss.backward()
            optimizer.step()
            
            epoch_train_loss += loss.item()
            
        epoch_train_loss /= len(train_dataloader)
        epoch_validation_loss = compute_loss(model, validation_dataloader)
        
        if(i % 1== 0):
            print("Epoch num {}/{}".format(i+1,NUM_EPOCHS))
            print("Epoch train loss {}".format(epoch_train_loss))
            print("Epoch validation loss {}".format(epoch_validation_loss))

In [5]:
class DifficultyFoldDataset(Dataset):

    def __init__(self, data, transform=None):
        self.image_paths = data[:,0]
        self.scores = data[:,1]
        self.transform = transform

    def __len__(self):
        return len(self.scores)

    def __getitem__(self, idx):

        img_path = self.image_paths[idx]
        image = Image.open(img_path)
        
        if(self.transform):
            image = self.transform(image)
        
        score = torch.tensor(float(self.scores[idx]))

        return (image, score, img_path)

In [6]:
content_transform = Compose([ViT_B_32_Weights.IMAGENET1K_V1.transforms()])

In [7]:
train_df = pd.read_csv(GND_FILEPATH)
dataset = np.array(train_df[['path','score']].values.tolist())
to_tensor = ToTensor()

In [8]:
fold_file = open(FOLD_PATH, 'rb')
folds = pickle.load(fold_file)

In [None]:
score_dict = {}
for i, (train_index, test_index) in enumerate(folds):
    train_data = np.array(dataset[train_index])
    test_data  = np.array(dataset[test_index])
    
    train_dataset = DifficultyFoldDataset(train_data, content_transform)
    test_dataset  = DifficultyFoldDataset(test_data, content_transform)
    
    vit_model = vit_b_32(weights=ViT_B_32_Weights.DEFAULT)
    regression_head = torch.nn.Sequential(
        torch.nn.Linear(in_features = 768 , out_features = 1),
        torch.nn.Sigmoid())
    vit_model.heads = regression_head
    vit_model = vit_model.to(device)  
    vit_model.train()
    
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)    
    
    criterion = torch.nn.MSELoss()
    optimizer = Adam(vit_model.parameters(), lr=0.0001)
    train_model(vit_model, train_dataloader,test_dataloader, optimizer, criterion)
    vit_model.eval()
    for item in test_dataset:
        image, score, path = item
        score = vit_model(image.unsqueeze(0).to(device))
        score_dict[path] = score

In [10]:
paths = train_df[['path']].values.tolist()
paths = [path[0] for path in paths]

In [11]:
scores = []
for path in paths:
    scores.append(float(score_dict[path].detach().cpu()))

In [12]:
result_df = pd.DataFrame({'path': paths, 'score': scores})
result_df.to_csv('/../../Results/vitregressor-deepretrieval-rparis6k-ap.csv',index=False)