## Basic modules

In [1]:
import numpy as np
import os
import scipy

# sklearn
from sklearn.svm import LinearSVC

# ignore convergence warnings from sklearn
import warnings
import sklearn.exceptions
warnings.filterwarnings("ignore", category=sklearn.exceptions.ConvergenceWarning)

# pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms

# pandas
import pandas as pd

# matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# seaborn
import seaborn as sns
sns.set()

# pickle
import pickle

# cv2
import cv2
from PIL import Image

# utility functions
import utils.misc as misc
from utils.loader import ModelLoader

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

## Reading data

The neural-activation (*in pickle format*) data consists of an organized dictionary with the following entries:

* `images_paths`: numpy array containing paths to all the 1960 images
* `image_ctg`: numpy array containing class labels from 0 -> 6
* `image_splits` : 1960 x 10 numpy array containing 10 80:20 train:val splits used in the paper. Though I generate my own validation splits for computing the sit scores
* `features`: 168 dimensional(for multi-unit) neural_features for all the images i.e 1960 x 168 numpy array
* `categ_name_map`: dictionary mapping from numeric class label to class name e.g. face, animal etc.

The dataset consists of images belonging to 7 classes and 49 object types. The image paths are arranged in an order such that the images belonging to a particular object type are together. There are 40 images per object in the dataset, so images [1 - 40] belong to object 1, images [41 - 80] belong to object 2 and so on.

In [2]:
data_path = 'data/PLoSCB2014_data_20141216'
with open('data/PLoSCB2014_data_20141216/NeuralData_IT_multiunits.pkl','rb') as f:
    data = pickle.load(f)

In [3]:
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Preparing Input Images

For feeding the cadieu dataset images to the pretrained CNNs, we need to preprocess the images with appropriate reshaping, normalization and other data augmentation steps. In addition, we also need to convert the images to tensors, in order to use pytorch.

In [4]:
# read dataset
X = misc.read_images(data['image_paths'],data_path=data_path)

print ("read {} images ... preprocessed input shape: {}".format(X.shape[0],X.shape))

read 1960 images ... preprocessed input shape: torch.Size([1960, 3, 224, 224])


## Load the pretrained model

There are 2 steps to be done here:

* Load the pretrained model e.g. alexnet,vgg16,resnet50 etc.
* Change it appropriately in order to extract appropriate features

In [5]:
# initilize model loader
model_loader = ModelLoader()

# model name
model_name = 'alexnet'

# load desired model
model = model_loader.load(model_name)

# transfer model to device
model = model.to(device)

# set model to eval mode
model.eval()

alexnet_partial(
  (model): AlexNet(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
      (1): ReLU(inplace)
      (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (4): ReLU(inplace)
      (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (7): ReLU(inplace)
      (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (9): ReLU(inplace)
      (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace)
      (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
    (classifier): Sequential(
      (0): Dropout(p=0.5)
      (1): Linear(in_features=9216, out_f

## Extract features

In [6]:
# extract model features
model_features = misc.extract_features(X,model)

print ("extracting features for {} images ... model features shape: {}"
                                   .format(model_features.shape[0],model_features.shape))

extracting features for 1960 images ... model features shape: (1960, 4096)


## Read neural features

In [7]:
neural_features = data['features']
print ("read neural features for {} images with shape: {}".format(neural_features.shape[0],neural_features.shape))

read neural features for 1960 images with shape: (1960, 168)


## Compute Similarity to IT Dissimilarity Matrix (SIT)

In [9]:
# compute SIT score
sit_mean,sit_std = misc.sit_score(model_features,neural_features,num_val_splits=100,val_ratio=0.2)

# print the mean and standard deviation of sit scores
print ("sit_mean: {:.4f}\t sit_std: {:.4f}".format(sit_mean,sit_std))

sit_mean: 0.5310	 sit_std: 0.0372


## Linear SVM accuracy score

In order to see how good any set of features is, we compute the Linear SVM accuracy obtained for the classification task on the cadieu dataset (7 classes).

In [10]:
# get linear svm accuracy for model features 
acc_mean,acc_std = misc.linear_svm_score_v2(model_features,data['image_ctg'],neural_features,
                                            num_val_splits=10,val_ratio=0.2)
print ("Model features: linear svm accuracy mean: {:.4f} \t std: {:.4f}".format(acc_mean,acc_std))

Model features: linear svm accuracy mean: 0.6222 	 std: 0.0139


In [11]:
# get linear svm accuracy for IT neural features 
acc_mean,acc_std = misc.linear_svm_score_v2(neural_features,data['image_ctg'])
print ("Neural features: linear svm accuracy mean: {:.4f} \t std: {:.4f}".format(acc_mean,acc_std))

Neural features: linear svm accuracy mean: 0.6635 	 std: 0.0309
