## Basic modules

In [1]:
import numpy as np
import os
import scipy

# misc utility functions
import utils.misc as misc

# load model definitions
from utils.loader import ModelLoader

# sklearn
from sklearn.svm import LinearSVC

# ignore convergence warnings from sklearn
import warnings
import sklearn.exceptions
warnings.filterwarnings("ignore", category=sklearn.exceptions.ConvergenceWarning)

# pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms

# pandas
import pandas as pd

# matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# seaborn
import seaborn as sns
sns.set()

# pickle
import pickle

# cv2
import cv2
from PIL import Image

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

## Reading data

The neural-activation (*in pickle format*) data consists of an organized dictionary with the following entries:

* `images_paths`: numpy array containing paths to all the 1960 images
* `image_ctg`: numpy array containing class labels from 0 -> 6
* `image_splits` : 1960 x 10 numpy array containing 10 80:20 train:val splits used in the paper. Though I generate my own validation splits for computing the sit scores
* `features`: 168 dimensional(for multi-unit) neural_features for all the images i.e 1960 x 168 numpy array
* `categ_name_map`: dictionary mapping from numeric class label to class name e.g. face, animal etc.

The dataset consists of images belonging to 7 classes and 49 object types. The image paths are arranged in an order such that the images belonging to a particular object type are together. There are 40 images per object in the dataset, so images [1 - 40] belong to object 1, images [41 - 80] belong to object 2 and so on.

In [2]:
data_path = 'data/PLoSCB2014_data_20141216'
with open('data/PLoSCB2014_data_20141216/NeuralData_IT_multiunits.pkl','rb') as f:
    data = pickle.load(f)

In [3]:
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Preparing Input Images

For feeding the cadieu dataset images to the pretrained CNNs, we need to preprocess the images with appropriate reshaping, normalization and other data augmentation steps. In addition, we also need to convert the images to tensors, in order to use pytorch.

In [4]:
# read cadieu dataset images
X = misc.read_images(data['image_paths'],data_path=data_path)

print ("read {} images ... preprocessed input shape: {}".format(X.shape[0],X.shape))

read 1960 images ... preprocessed input shape: torch.Size([1960, 3, 224, 224])


## Read neural features

In [5]:
neural_features = data['features']
print ("read neural features for {} images with shape: {}".format(neural_features.shape[0],neural_features.shape))

read neural features for 1960 images with shape: (1960, 168)


## Datagenerator

In [6]:
class Datagenerator:
    def __init__(self,imgs,neural_features,val_ratio=0.2):
        
        # get train/val split using val_ratio
        self.train_mask,self.val_mask = misc.get_train_val_split_indices(total_num_imgs=imgs.shape[0],val_ratio=val_ratio)
        
        # get training and validation data
        self.imgs_train,self.neural_feat_train = imgs[self.train_mask],neural_features[self.train_mask]
        self.imgs_val,self.neural_feat_val = imgs[self.val_mask],neural_features[self.val_mask]
        
        # number of objects
        self.num_obj=49
    
    def get_next(self,batch_size=49*2,mode='train'):
        
        # check if batch size is multiple of self.num_obj
        assert batch_size%self.num_obj == 0
        
        if mode == 'train':
            img_split,neural_feat_split = self.imgs_train,self.neural_feat_train
        else:
            img_split,neural_feat_split = self.imgs_val,self.neural_feat_val
        
        # compute batch_size ratio
        batch_size_ratio = batch_size/img_split.shape[0]
        
        # sample batch indices
        _,batch_mask = misc.get_train_val_split_indices(total_num_imgs=img_split.shape[0],val_ratio=batch_size_ratio)
            
        # get batch imgs and neural features
        img_batch,neural_feat_batch = img_split[batch_mask],neural_feat_split[batch_mask]
        
        return img_batch,neural_feat_batch

## Finetune the pretrained model using RDM loss function

In [7]:
class Agent:
    def __init__(self,model_name,rdm_gt_feat,device,learning_rate):
        # model loader instance 
        model_loader = ModelLoader()
        
        # define model and transfer to device
        self.model = model_loader.load(model_name).to(device)
        
        # define optimizer
        self.opt = optim.Adam(self.model.parameters(),lr=learning_rate)
        
        # define datagenerator for sampling batches
        self.datagen = Datagenerator(X,rdm_gt_feat,val_ratio=0.2)
        

    def train(self,batch_size=49*8,max_train_steps=1000,print_every=10):
        # set model to train mode
        self.model.train()
            
        for train_step in range(max_train_steps):

            # sample training batch
            img_batch,neural_feat_batch = self.datagen.get_next(batch_size=batch_size)
            img_batch = img_batch.to(device)
            
            # compute rdm from neural features
            neural_rdm = misc.get_rdm(neural_feat_batch)
            neural_rdm = torch.from_numpy(neural_rdm).float().to(device)

            # get model_features and compute model rdm
            model_features = self.model(img_batch).squeeze()
            model_rdm = misc.get_rdm_tensor(model_features)

            # define rdm loss
            loss = torch.mean((model_rdm-neural_rdm)**2)

            # perform optimization step
            self.opt.zero_grad()
            loss.backward()
            self.opt.step()
            
            # print progress after every 'print_every' steps
            if train_step%print_every == 0:
                
                # set model to eval mode
                self.model.eval()
                
                # compute sit for training batch
                train_model_features = misc.extract_features(img_batch.detach().cpu(),self.model,batch_size=4)
                train_sit_mean,train_sit_std = misc.sit_score(train_model_features,neural_feat_batch,
                                                                    num_val_splits=1,val_ratio=1)
                
                # compute sit for validation dataset
                val_model_features = misc.extract_features(self.datagen.imgs_val,self.model)
                val_sit_mean,val_sit_std = misc.sit_score(val_model_features,self.datagen.neural_feat_val,
                                                                     num_val_splits=1,val_ratio=1)
                
                # compute linear svm accuracy for validation dataset
                model_features = misc.extract_features(agent.datagen.imgs_val,agent.model) 
                acc_mean,acc_std = misc.linear_svm_score_v2(model_features,data['image_ctg'][agent.datagen.val_mask],
                                                    num_val_splits=20,num_subsampled_feat=-1)
                
                # set model to train mode
                self.model.train()
                
                # print evaluation metric values
                print ("Step: {}\t loss: {:.3f}\t train_sit: {:.4f} \t val_sit: {:.4f} \t lsvm_acc: {:.4f}".format(train_step,
                                                                loss.item(),train_sit_mean,val_sit_mean,acc_mean))

## Model features for RDM ground truth computation

In [8]:
# model to be used for rdm training ground truth
## CHANGE THIS
rdm_model_name = 'resnet34'

# dir storing model features
model_feat_path = 'models/model_features/'

with open(model_feat_path+rdm_model_name+'_feat.pkl','rb') as f:
    rdm_model_feat = pickle.load(f)

In [9]:
## DECIDE features to be used for rdm ground truth computation
rdm_gt_feat = neural_features
#rdm_gt_feat = rdm_model_feat

# create agent instance
agent = Agent(model_name='squeezenet_v0',rdm_gt_feat=rdm_gt_feat,device=device,learning_rate=1e-6)

In [10]:
# test linear svm accuracy on the validation dataset before training

# model features for validation images
model_features = misc.extract_features(agent.datagen.imgs_val,agent.model)

# get linear svm accuracy for validation model features 
acc_mean,acc_std = misc.linear_svm_score_v2(model_features,data['image_ctg'][agent.datagen.val_mask],
                                        num_val_splits=100,num_subsampled_feat=-1)
print ("Pre-training: validation model features\t lsvm_acc_mean: {:.4f}\t lsvm_acc_std: {:.4f}".format(acc_mean,acc_std))

Pre-training: validation model features	 lsvm_acc_mean: 0.7476	 lsvm_acc_std: 0.0602


In [11]:
# train the agent
agent.train(batch_size=49*4,max_train_steps=1000,print_every=100)

Step: 0	 loss: 0.632	 train_sit: 0.4429 	 val_sit: 0.5780 	 lsvm_acc: 0.7439
Step: 100	 loss: 0.461	 train_sit: 0.4229 	 val_sit: 0.6114 	 lsvm_acc: 0.7510
Step: 200	 loss: 0.316	 train_sit: 0.4546 	 val_sit: 0.6302 	 lsvm_acc: 0.7510
Step: 300	 loss: 0.192	 train_sit: 0.5308 	 val_sit: 0.6703 	 lsvm_acc: 0.7724
Step: 400	 loss: 0.156	 train_sit: 0.5226 	 val_sit: 0.6949 	 lsvm_acc: 0.7582
Step: 500	 loss: 0.119	 train_sit: 0.4862 	 val_sit: 0.7217 	 lsvm_acc: 0.7816
Step: 600	 loss: 0.080	 train_sit: 0.5707 	 val_sit: 0.7241 	 lsvm_acc: 0.7735
Step: 700	 loss: 0.075	 train_sit: 0.5830 	 val_sit: 0.7280 	 lsvm_acc: 0.7816
Step: 800	 loss: 0.066	 train_sit: 0.5530 	 val_sit: 0.7329 	 lsvm_acc: 0.7898
Step: 900	 loss: 0.056	 train_sit: 0.6019 	 val_sit: 0.7350 	 lsvm_acc: 0.8041


In [13]:
# test linear svm accuracy on the validation dataset after training

# model features for validation images
model_features = misc.extract_features(agent.datagen.imgs_val,agent.model)

# get linear svm accuracy for validation model features 
acc_mean,acc_std = misc.linear_svm_score_v2(model_features,data['image_ctg'][agent.datagen.val_mask],
                                        num_val_splits=100,num_subsampled_feat=-1)
print ("Post-training: validation model features\t lsvm_acc_mean: {:.4f}\t lsvm_acc_std: {:.4f}".format(acc_mean,acc_std))

Post-training: validation model features	 lsvm_acc_mean: 0.7637	 lsvm_acc_std: 0.0614
