In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
from torch.autograd import Variable
import torch.nn as nn
import pickle
from random import randint
import sys
from tqdm import tqdm_notebook as tqdm
import cv2

In [None]:
from util.data_utils import get_SALICON_datasets
from util.data_utils import get_raw_SALICON_datasets

train_data, val_data, test_data, mean_image = get_SALICON_datasets('Dataset/Transformed') # 128x96
#train_data, val_data, test_data, mean_image = get_raw_SALICON_datasets(dataset_folder='/tmp/pbqk24_tmp') # 640x480


In [None]:
# Define the Normalized Scanpath Saliency Loss function, which is used in training the network
# from https://github.com/dariozanca/FixaTons/blob/master/_visual_attention_metrics.py
def NSS(saliencyMap, fixationMap):
    # saliencyMap is the saliency map
    # fixationMap is the human fixation map (binary matrix)

    # If there are no fixations to predict, return NaN
    if not fixationMap.any():
        print('Error: no fixationMap')
        score = nan
        return score

    # make sure maps have the same shape
    from scipy.misc import imresize
    map1 = imresize(saliencyMap, np.shape(fixationMap))
    if not map1.max() == 0:
        map1 = map1.astype(float) / map1.max()

    # normalize saliency map
    if not map1.std(ddof=1) == 0:
        map1 = (map1 - map1.mean()) / map1.std(ddof=1)

    # mean value at fixation locations
    score = map1[fixationMap.astype(bool)].mean()

    return score

def PCCLoss_torch(x, y):
    """Computes Pearson Cross Correlation loss
    :param x: prediction
    :param y: label
    """
    vx = x - torch.mean(x)
    vy = y - torch.mean(y)
    
    cc = torch.sum(vx*y) / (torch.sqrt(torch.sum(vx ** 2)) * torch.sqrt(torch.sum(vy ** 2)))
    
    # since cc is in [-1, 1], and 0 is 'bad' and close to -1 or 1 is 'good', return the abs value of cc
    cc = abs(cc)
    # actually return 1 -  cc, as we need to return a loss (since 1 is good, we return loss as 1 - cc)
    loss = 1 - cc
    
    return loss

In [None]:
from util.data_utils import OverfitSampler
from models.DSCLRCN_PyTorch2 import DSCLRCN #DSCLRCN_PyTorch, DSCLRCN_PyTorch2 or DSCLRCN_PyTorch3
from util.solver import Solver

batchsize = 20 # Recommended: 20
epoch_number = 10 # Recommended: 10 (epoch_number =~ batchsize/2)
net_type = 'Seg' # 'Seg' or 'CNN' Recommended: Seg
optim_str = 'Adam' # 'SGD' or 'Adam' Recommended: Adam
optim_args = {'lr': 1e-4} # 1e-2 if SGD, 1e-4 if Adam
loss_func = torch.nn.KLDivLoss() # PCCLoss_torch or torch.nn.KLDivLoss() Recommended: torch.nn.KLDivLoss()

optim = torch.optim.SGD if optim_str == 'SGD' else torch.optim.Adam

#num_train = 100
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batchsize, shuffle=True, num_workers=4)#,
                                           #sampler=OverfitSampler(num_train))
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batchsize, shuffle=True, num_workers=4)

# Attempt to train a model using the original image sizes
model = DSCLRCN(input_dim=(96, 128), local_feats_net=net_type)
# Set solver as torch.optim.SGD and lr as 1e-2, or torch.optim.Adam and lr 1e-4
solver = Solver(optim=optim, optim_args=optim_args, loss_func=loss_func)
solver.train(model, train_loader, val_loader, num_epochs=epoch_number, log_nth=50, filename_args={
    'batchsize' : batchsize,'epoch_number' : epoch_number,
    'net_type' : net_type, 'optim' : optim_str}
)

In [None]:
#Saving the model:
model.save('pretrained/model_{}_{}_lr4_batch{}_epoch{}'.format(net_type, optim_str, batchsize, epoch_number))
with open('pretrained/solver_{}_{}_lr4_batch{}_epoch{}.pkl'.format(net_type, optim_str, batchsize, epoch_number), 'wb') as outf:
    pickle.dump(solver, outf, pickle.HIGHEST_PROTOCOL)

In [None]:
# Plotting training and validation loss over iterations:

plt.subplot(2,1,1)
plt.plot(solver.train_loss_history, 'o')
plt.title('Train Loss')
plt.subplot(2,1,2)
plt.plot(solver.val_loss_history, '-o')
plt.title('Val Loss')
plt.show()

In [None]:
# Loading a model from the saved state that produced the lowest validation loss during training:

from models.DSCLRCN_PyTorch2 import DSCLRCN # Requires the model class be loaded

# Assumes the model uses models.DSCLRCN_PyTorch2 architecture. If not, this method will fail
def load_model_from_checkpoint(model_name):
    filename = "pretrained/" + model_name + ".pth"
    if torch.cuda.is_available():
        checkpoint = torch.load(filename)
    else:
        # Load GPU model on CPU
        checkpoint = torch.load(filename, map_location='cpu')
    start_epoch = checkpoint['epoch']
    best_accuracy = checkpoint['best_accuracy']
    # Find which local_feats_net the model used
    if model_name.find('Seg') > -1:
        model = DSCLRCN(input_dim=(96, 128), local_feats_net='Seg')
    else:
        model = DSCLRCN(input_dim=(96, 128), local_feats_net='CNN')
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (trained for {} epochs)".format(model_name, checkpoint['epoch']))

    if torch.cuda.is_available():
        model = model.cuda()
    return model

def load_model(model_name):
    model = torch.load("pretrained/" + model_name, map_location='cpu')
    print("=> loaded model_1 '{}'".format(model_name))
    
    if torch.cuda.is_available():
        model = model.cuda()
    return model

In [None]:
#Loading some pretrained models to test them on the images:
models = []
model_names = []
# Run the below line if a model has been trained already in this execution cycle
model_names.append('model_{}_{}_lr4_batch{}_epoch{}'.format(net_type, optim_str, batchsize, epoch_number))
models.append(model)

# model_1: Best model thusfar
# Load the batchsize = 20, epoch = 10 model - best produced so far
model_names.append("BEST_SO_FAR b20 e10 KLDiv Adam/model_Seg_100_lr4_batch20_epoch10")

# model_2: Best contender for model_0
# Uses SGD and Pearson's CC loss function (proper, loss taken as 1 - abs(cc))
model_names.append("b20 e20 PCC abs/best_model_Seg_SGD_lr4_batch20_epoch10")

# other models

# Load the models specified above
for name in model_names:
    if "best_model" in name:
        models.append(load_model_from_checkpoint(name))
    else:
        models.append(load_model(name))

# Load the batchsize = 20, epoch = 10 model
model_1_name = "model_Seg_100_lr4_batch20_epoch10"
model_1 = torch.load("pretrained/" + model_1_name, map_location='cpu')
print("=> loaded model_2 '{}'".format(model_1_name))

# Load the batchsize = 20, epoch = 10, checkpointed model
model_2_name = "best_model_Seg_100_lr4_batch20_epoch10"
model_2 = load_model_from_checkpoint(model_2_name)

# Move the models to the GPU if one is available
if torch.cuda.is_available():
    model_1 = model_1.cuda()
    model_2 = model_2.cuda()
    print("Moved all loaded models to GPU")

In [None]:
# Loading the original images from the test set ('test_datadict.pickle': dictionary of images and fixation maps)
# NOTE: This does NOT contain any fixation maps, as these are not provided with SALICON test images
with open('Dataset/Transformed/test_datadict.pickle', 'rb') as f:
        test_data_original = pickle.load(f)
        print("Test data loaded")

# Loading the original images from the validation set ('val_datadict.pickle': dictionary of images and fixation maps)
with open('Dataset/Transformed/val_datadict.pickle', 'rb') as f:
        val_data_original = pickle.load(f)
        print("Validation data loaded")

In [None]:
# Testing the different models on a random image from the val set:

# Pick a random test image and validation image
test_image_id = randint(0, len(test_data_original['images'])-1)
val_image_id  = randint(0, len(val_data_original['images'])-1)

# Load the images
#x,y = test_data.__getitem__(test_image_id)
x_val, y_val = val_data.__getitem__(val_image_id)

# Get the original (before pre-processing) images to be displayed
#original = test_data_original['images'][test_image_id]
original_val = val_data_original['images'][val_image_id]

# Loading an individual/specific image stored in /Dataset
x = cv2.imread('Dataset/test.jpg').astype(np.float32)/255.; y = cv2.imread('Dataset/test_GT.jpg').astype(np.float32)/255. # Load the image and GT
x = cv2.cvtColor(x,cv2.COLOR_BGR2RGB); y = cv2.cvtColor(y, cv2.COLOR_BGR2RGB) # convert from BGR (cv2) to RGB (matplotlib)
x = cv2.resize(x,(128, 96), interpolation = cv2.INTER_AREA); y = cv2.resize(y,(128, 96), interpolation = cv2.INTER_AREA) # Resize images to 128, 96
original = x.copy() # copy the image to use as output

x -= mean_image # normalize x
x = x.transpose(2,0,1) # Convert from H, W, C to C, H, W (ordering used by PyTorch tensors)
x = torch.from_numpy(x) # Convert np arrays to tensors
y = torch.from_numpy(y)
# original = torch.from_numpy(original)


# Create copies of the images to pass through each model
x = x.contiguous().view(1, *x.size())
x_2 = x[:]
x_val = x_val.contiguous().view(1, *x_val.size())
x_2_val = x_val[:]
if torch.cuda.is_available():
    x = x.cuda()
    x_val = x_val.cuda()
    x_2 = x_2.cuda()
    x_2_val = x_2_val.cuda()
y = y.numpy()
y_val = y_val.numpy()



##### First model #####

x_sal = models[0](Variable(x))
if torch.cuda.is_available():
    x_sal = x_sal.cpu()
x_sal_nmp = x_sal.squeeze().data.numpy()


# Sigma used by both/all models as all inputs same shape
sigma = 0.035*min(x_sal_nmp.shape) # Define a sigma to be used for Gaussian blurring

# Blur the saliency map

# Sigma value used by all models, as all inputs are of same shape
sigma = 0.035*min(x_sal_nmp.shape) # Define a sigma to be used for Gaussian blurring

x_sal_nmp = cv2.GaussianBlur(x_sal_nmp, (int(4*sigma), int(4*sigma)), sigma)

x_val_sal = models[0](Variable(x_val))
if torch.cuda.is_available():
    x_val_sal = x_val_sal.cpu()
x_val_sal_nmp = x_val_sal.squeeze().data.numpy()
# Blur the saliency map
x_val_sal_nmp = cv2.GaussianBlur(x_val_sal_nmp, (int(4*sigma), int(4*sigma)), sigma)

##### Second model #####
x_2_sal = models[1](Variable(x_2))
if torch.cuda.is_available():
    x_2_sal = x_2_sal.cpu()
x_2_sal_nmp = x_2_sal.squeeze().data.numpy()
# Blur the saliency map
x_2_sal_nmp = cv2.GaussianBlur(x_2_sal_nmp, (int(4*sigma), int(4*sigma)), sigma)


x_2_val_sal = models[1](Variable(x_2_val))
if torch.cuda.is_available():
    x_2_val_sal = x_2_val_sal.cpu()
x_2_val_sal_nmp = x_2_val_sal.squeeze().data.numpy()
# Blur the saliency map
x_2_val_sal_nmp = cv2.GaussianBlur(x_2_val_sal_nmp, (int(4*sigma), int(4*sigma)), sigma)


# Plot the output
plt.figure(figsize=(20,8))

##### Testing set image #####
plt.subplot(2,4,1); plt.title('Original')
plt.imshow(original)
plt.subplot(2,4,2); plt.title('Ground Truth')
plt.imshow(y, cmap='gray')

# First model
plt.subplot(2,4,3)
plt.imshow(x_sal_nmp, cmap='gray'); plt.title(model_names[0][model_names[0].find('batch'):])
# Second model
plt.subplot(2,4,4)
plt.imshow(x_2_sal_nmp, cmap='gray'); plt.title(model_names[1][model_names[1].find('batch'):])

##### Validation set image #####
plt.subplot(2,4,5); plt.title('Original Val')
plt.imshow(original_val)
plt.subplot(2,4,6); plt.title('Ground Truth Val')
plt.imshow(y_val, cmap='gray')

# First model
plt.subplot(2,4,7)
plt.imshow(x_val_sal_nmp, cmap='gray'); plt.title(model_names[0][model_names[0].find('batch'):])
# Second model
plt.subplot(2,4,8)
plt.imshow(x_2_val_sal_nmp, cmap='gray'); plt.title(model_names[1][model_names[1].find('batch'):])

# plt.savefig('ResExamples/example_test_'+str(test_image_id)+'_val_'+str(val_image_id)+'.png')
plt.show()

In [None]:
# Define the Pearson Cross Correlation loss functions:
# The first uses torch, can be fully executed on GPU
# the other for uses numpy, must be executed on CPU
# def PCCLoss_torch(x, y):
#     """Computes Pearson Cross Correlation loss
#     :param x: prediction
#     :param y: label
#     """
#     vx = x - torch.mean(x)
#     vy = y - torch.mean(y)
    
#     loss = torch.sum(vx*y) / (torch.sqrt(torch.sum(vx ** 2)) * torch.sqrt(torch.sum(vy ** 2)))
    
#     return loss

def PCCLoss_numpy(x, y):
    """Computes Pearson Cross Correlation loss
    :param x: prediction
    :param y: label
    """
    vx = x - np.mean(x)
    vy = y - np.mean(y)
    
    loss = np.sum(vx*y) / (np.sqrt(np.sum(vx ** 2)) * np.sqrt(np.sum(vy ** 2)))
    
    return loss

In [None]:
# Define a function for testing a model
# The pipeline is different depending on if the loss fn is numpy or torch based (check naively implemented so far)
# This is because the proper method of testing, applying a gaussian blur to the output map, has only been implemented to use numpy.
# Thus, the torch peipeline will not yield correct results as it does not correctly follow the method of the original paper.
def test_model(model, data_source, loss_fn=PCCLoss_numpy, input_size=(640, 480)):
    test_loader = torch.utils.data.DataLoader(data_source, batch_size=5, shuffle=True, num_workers=4)
    testLosses = []
    
    for data in tqdm(test_loader):
        inputs, labels = data
        if torch.cuda.is_available():
            inputs = Variable(inputs.cuda())
            labels = Variable(labels.cuda())
        else:
            inputs = Variable(inputs)
            labels = Variable(labels)
            
        ### TORCH PIPELINE ###
        if loss_fn == PCCLoss_torch:
            # Produce the output
            outputs = model(inputs).squeeze()

            # Zero-center the fixation maps by dividing each value in each fixation map by the sum of all values in that
            # fixation map
            labels_sum = torch.sum(labels.contiguous().view(labels.size(0),-1), dim=1)
            labels /= labels_sum.contiguous().view(*labels_sum.size(), 1, 1).expand_as(labels)
        
        ### NUMPY PIPELINE ###
        #if loss_fn == PCCLoss_numpy:
        else:
            # Produce the output
            outputs = model(inputs).squeeze()
            # Move the output to the CPU so we can process it using numpy
            outputs = outputs.cpu().data.numpy()
            
            # Show the input and output, side to side
            cv2.imshow("Input & Output", cv2.cvtColor(np.hstack(((inputs.cpu().data.numpy()[0].transpose(1, 2, 0) + mean_image), np.repeat(outputs[0, :, :, np.newaxis], 3, axis=2))), cv2.COLOR_RGB2BGR))
            cv2.waitKey(0)
            
            # Resize the images to input size
            outputs = np.array([cv2.resize(output, input_size) for output in outputs])
            
            # Apply a Gaussian filter to blur the saliency maps
            sigma = 0.035*min(input_size[0], input_size[1])
            outputs = np.array([cv2.GaussianBlur(output, (int(4*sigma), int(4*sigma)), sigma) for output in outputs])

            # Zero-center the fixation maps by dividing each value in each fixation map by the sum of all values in that
            # fixation map
#             labels_sum = torch.sum(labels.contiguous().view(labels.size(0),-1), dim=1)
#             labels /= labels_sum.contiguous().view(*labels_sum.size(), 1, 1).expand_as(labels)
            
            labels = labels.cpu().numpy()
            # Show the blurred output and the GT, side to side
            cv2.imshow("Ground Truth & Blurred Output", np.hstack((labels[0], outputs[0])))
            cv2.waitKey(0)
        
        ### BOTH PIPELINES ###
        testLosses.append(loss_fn(outputs, labels).item())
    
    return testLosses

In [None]:
# Obtaining PCC Loss values on the test set for different models:

# loss_fn = nn.KLDivLoss()
# Use Pearson Cross Correlation loss
loss_fn = PCCLoss_numpy

# test on validation data as we don't have ground truths for the test data (this was also done in original DSCLRCN paper)

test_losses = []
for model in tqdm(models):
    test_losses.append(test_model(model, val_data, loss_fn=loss_fn, input_size=(128, 96)))

# Print out the result
print('Pearson Cross Correlation Loss on Validation set (Seg):')
for i, loss in enumerate(test_losses):
    print('[{}] {}: {}'.format(i, model_names[i][model_names[i].find('batch'):], np.mean(loss)))
