In [1]:
import os
import torch
from skimage import io, transform
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn as nn
import torch.nn.functional as F
import collections
from PIL import Image
import xgboost
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint, stats
from sklearn.metrics import make_scorer
import torchvision.models as models
import pickle
print(xgboost.__version__)

1.5.0


In [2]:
class Args():
    def __init__(self):
        self.batch_size = 4
        self.network_name = 'resnet152'
        self.model_PATH = 'pytorch/vision:v0.9.0'
        self.features_PATH = 'features'
        self.log_interval = 1000
        self.pretrained = True

args = Args()

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)

kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {}

cuda


In [3]:
# Create a custom class for our LaMem dataset
class LaMemDataset(Dataset):
    def __init__(self, split_file, splits_dir, images_dir, transform=None):
        data = []
        with open(splits_dir + split_file) as f:
            lines = f.readlines()
            for line in lines:
                data += line.split()
        self.data = np.array(data).reshape(-1, 2)
        self.splits_dir = splits_dir
        self.images_dir = images_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.images_dir,
                                self.data[idx, 0])
        image = Image.open(img_name).convert('RGB')
        memorability_score = torch.tensor(float(self.data[idx, 1]), dtype=torch.float32)

        if self.transform:
            image = self.transform(image)
        return image, memorability_score

In [4]:
# Create dataset and dataloader for train and validation
split_names = ['train_1.txt', 'val_1.txt']

data_transform = transforms.Compose([
                    transforms.Resize((227, 227)),
                    transforms.ToTensor(),           
                    transforms.Normalize(
                        mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225]
                        )
                    ])

image_datasets = {
    names.split('_')[0]: LaMemDataset(split_file=names, 
                                splits_dir='lamem/splits/', 
                                images_dir='lamem/images/', 
                                transform=data_transform) 
    for names in split_names
}

dataloaders_dict = {
    names: DataLoader(image_datasets[names], 
                      batch_size=args.batch_size, 
                      shuffle=True, 
                      num_workers=kwargs['num_workers']) 
    for names in ['train', 'val']
}

In [5]:
# Create an Identity layer to extract features from ResNet152
class Identity(torch.nn.Module):
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x

In [6]:
# Initialize network and modify the last layer
model = models.resnet152(pretrained=args.pretrained)
model = torch.hub.load(args.model_PATH, args.network_name, pretrained=True)
model.to(device)
model.fc = Identity()

model.eval()

Using cache found in C:\Users\Alex/.cache\torch\hub\pytorch_vision_v0.9.0


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
# Extract features from ResNet152 for train and validation
def extract_features(dataloaders_dict,  method='train'):
    feature_extracted = np.empty((0, 2048), dtype=np.float32)
    labels_array = np.empty((0, 1), dtype=np.float32)
    
    for inputs, labels in dataloaders_dict[method]:
        inputs = inputs.to(device)
        labels = labels.to(device)
        features = model(inputs)
    
        feature_extracted = np.append(feature_extracted, features.detach().cpu().numpy(), axis=0)
        labels_array = np.append(labels_array, labels.detach().cpu().numpy())
        
        if feature_extracted.shape[0] % args.log_interval == 0:
            print(f"{method}: features_extracted: {feature_extracted.shape}")
        
    return feature_extracted, labels_array

train_features_extracted, train_labels = extract_features(dataloaders_dict, method='train')
validation_features_extracted, validation_labels = extract_features(dataloaders_dict, method='val')

In [6]:
# Because extracting the features can take some time, I save them in pt files
train_feature_fname = os.path.join(args.features_PATH, 'train_feature.pt')
validation_feature_fname = os.path.join(args.features_PATH, 'valid_feature.pt')
train_labels_fname = os.path.join(args.features_PATH, 'train_labels.pt')
validation_labels_fname = os.path.join(args.features_PATH, 'valid_labels.pt')

# Used to save features in pt files
# torch.save(train_feature_extracted, train_feature_fname)
# torch.save(validation_feature_extracted, validation_feature_fname)
# torch.save(train_labels, train_labels_fname)
# torch.save(validation_labels, validation_labels_fname)

# Load features in tensors
train_features_extracted = torch.load(train_feature_fname)
validation_features_extracted = torch.load(validation_feature_fname)
train_labels = torch.load(train_labels_fname)
validation_labels = torch.load(validation_labels_fname)

In [11]:
# Use spearman correlation as metric to get an idea about our model performances
def spearmanCorrelationWrapper(output_data, test_data):
    correlation, p_value = stats.spearmanr(output_data, test_data)
    return correlation

In [None]:
# This function is used to report best scores after hyperparameters tuning process
def report_best_scores(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                  results['mean_test_score'][candidate],
                  results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")

In [None]:
# RandomizedSearchCV for hyperparameters tuning
"""
BEST MODEL:
SpearmanrResult(correlation=0.6425295489498927)
Parameters: {'colsample_bytree': 0.7992694074557947, 
             'gamma': 0.03177917514301182, 
             'learning_rate': 0.12329469651469865, 
             'max_depth': 5, 
             'n_estimators': 136, 
             'subsample': 0.8918424713352255}
"""

xgb_model = xgboost.XGBRegressor()

params = {
    "colsample_bytree": uniform(0.7, 0.3),
    "gamma": uniform(0, 0.5),
    "learning_rate": uniform(0.03, 0.3),
    "max_depth": randint(2, 6), 
    "n_estimators": randint(100, 150), 
    "subsample": uniform(0.6, 0.4)
}

xgb_grid = RandomizedSearchCV(xgb_model, 
                              param_distributions=params, 
                              random_state=42, 
                              n_iter=200, 
                              cv=3, 
                              verbose=10, 
                              n_jobs=1,
                              scoring=make_scorer(spearmanCorrelationWrapper),
                              return_train_score=True)

xgb_grid.fit(train_features_extracted, train_labels, verbose=True)

report_best_scores(xgb_grid.cv_results_, 1)

In [8]:
# Train the best XGBRegressor model and predict on validation
xgb_model = xgboost.XGBRegressor(colsample_bytree=0.79, 
                                 gamma=0.031, 
                                 learning_rate=0.12,
                                 max_depth=5, 
                                 n_estimators=136, 
                                 subsample=0.89)
xgb_model.fit(train_features_extracted, train_labels, verbose=True)
out = xgb_model.predict(validation_features_extracted)

In [14]:
# Print the results
result = spearmanCorrelationWrapper(out, validation_labels)
print(f'Spearman correlation: {result}')

Spearman correlation: 0.6393922713881623


In [15]:
# In the end, save the best model
pickle.dump(xgb_model, open("model.dat", "wb"))

In [17]:
# Load the best model and features for future predictions
xgb_model = pickle.load(open("model.dat", "rb"))
out = xgb_model.predict(validation_features_extracted)

# result = spearmanCorrelationWrapper(out, validation_labels)
# print(f'Spearman correlation: {result}')

Spearman correlation: 0.6393922713881623
