In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
from torch.autograd import Variable
import torch.nn as nn
import pickle
from random import randint
import sys
from tqdm import tqdm_notebook as tqdm
import cv2

In [2]:
from util.data_utils import get_SALICON_datasets
from util.data_utils import get_raw_SALICON_datasets

train_data, val_data, test_data = get_SALICON_datasets('Dataset/Transformed') # 128x96
#train_data, val_data, test_data = get_raw_SALICON_datasets(dataset_folder='/tmp/pbqk24_tmp') # 640x480


Progress: 100%

In [None]:
from util.data_utils import OverfitSampler
from models.DSCLRCN_PyTorch2 import DSCLRCN
from util.solver import Solver

batchsize = 20 # Recommended: 20
epoch_number = 10 # Recommended: 10 (epoch_number =~ batchsize/2)
net_type = 'Seg' # 'Seg' or 'CNN' Recommended: Seg

#num_train = 100
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batchsize, shuffle=True, num_workers=4)#,
                                           #sampler=OverfitSampler(num_train))
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batchsize, shuffle=True, num_workers=4)

# Attempt to train a model using the original image sizes
model = DSCLRCN(input_dim=(96, 128), local_feats_net=net_type)
solver = Solver(optim_args={'lr': 1e-4})
solver.train(model, train_loader, val_loader, num_epochs=epoch_number, log_nth=50)

Loading weights for net_encoder
Loading weights for PlacesCNN_VGG16
START TRAIN.


  "See the documentation of nn.Upsample for details.".format(mode))


[Iteration 0/5000] TRAIN loss: 0.000104
[Iteration 50/5000] TRAIN loss: 0.000094
[Iteration 100/5000] TRAIN loss: 0.000076
[Iteration 150/5000] TRAIN loss: 0.000067
[Iteration 200/5000] TRAIN loss: 0.000067
[Iteration 250/5000] TRAIN loss: 0.000066
[Iteration 300/5000] TRAIN loss: 0.000064
[Iteration 350/5000] TRAIN loss: 0.000057
[Iteration 400/5000] TRAIN loss: 0.000068
[Iteration 450/5000] TRAIN loss: 0.000048
[Epoch 0/10] TRAIN KLD Loss: 0.000068
[Epoch 0/10] VAL KLD Loss: 0.000049
[Iteration 500/5000] TRAIN loss: 0.000048
[Iteration 550/5000] TRAIN loss: 0.000050
[Iteration 600/5000] TRAIN loss: 0.000043
[Iteration 650/5000] TRAIN loss: 0.000048
[Iteration 700/5000] TRAIN loss: 0.000048
[Iteration 750/5000] TRAIN loss: 0.000036
[Iteration 800/5000] TRAIN loss: 0.000032
[Iteration 850/5000] TRAIN loss: 0.000033
[Iteration 900/5000] TRAIN loss: 0.000032
[Iteration 950/5000] TRAIN loss: 0.000036
[Epoch 1/10] TRAIN KLD Loss: 0.000029
[Epoch 1/10] VAL KLD Loss: 0.000026
[Iteration 1000

In [None]:
#Saving the model:
model.save('pretrained/model_{}_100_lr4_batch{}_epoch{}_2'.format(net_type, batchsize, epoch_number))
with open('pretrained/solver_{}_100_lr4_batch{}_epoch{}_2.pkl'.format(net_type, batchsize, epoch_number), 'wb') as outf:
    pickle.dump(solver, outf, pickle.HIGHEST_PROTOCOL)

In [None]:
# Plotting training and validation loss over iterations:

plt.subplot(2,1,1)
plt.plot(solver.train_loss_history, 'o')
plt.title('Train Loss')
plt.subplot(2,1,2)
plt.plot(solver.val_loss_history, '-o')
plt.title('Val Loss')
plt.show()

In [None]:
#Loading some pretrained models to test them on the images:

# model_cnn1 = torch.load('pretrained/model_cnn_100_lr4').cuda()
# model_seg1 = torch.load('pretrained/model_seg_100_lr4').cuda()

# model_cnn2 = torch.load('pretrained/model_cnn_noCon_100_lr4').cuda()
# model_seg2 = torch.load('pretrained/model_seg_noCon_100_lr4').cuda()

# model_cnn3 = torch.load('pretrained/model_cnn_noLSTM_100_lr4').cuda()
# model_seg3 = torch.load('pretrained/model_seg_noLSTM_100_lr4').cuda()
model = torch.load("pretrained/model_Seg_100_lr4_batch20_epoch10", map_location='cpu')
# Load the same model twice for now as we only have one trained model
model_2 = torch.load("pretrained/model_Seg_100_lr4_batch20_epoch10", map_location='cpu')
#model_2 = torch.load("pretrained/model_Seg_100_lr4_batch10_epoch20", map_location='cpu')

# Move the models to the GPU if one is available
if torch.cuda.is_available():
    model = model.cuda()
    model_2 = model_2.cuda()


In [None]:
# Loading the original images from the test set ('test_datadict.pickle': dictionary of images and fixation maps)
# This does NOT contain any fixation maps, as these are not provided with SALICON test images
with open('Dataset/Transformed/test_datadict.pickle', 'rb') as f:
        test_data_original = pickle.load(f)
        print("Test data loaded")

# Loading the original images from the validation set ('val_datadict.pickle': dictionary of images and fixation maps)
with open('Dataset/Transformed/val_datadict.pickle', 'rb') as f:
        val_data_original = pickle.load(f)
        print("Validation data loaded")


In [None]:
# Testing the different models on a random image from the val set:

test_image_id = randint(0, len(test_data_original['images'])-1)
val_image_id  = randint(0, len(val_data_original['images'])-1)

x,y = test_data.__getitem__(test_image_id)
x_val, y_val = val_data.__getitem__(val_image_id)

original = test_data_original['images'][test_image_id]
original_val = val_data_original['images'][val_image_id]

x = x.contiguous().view(1, *x.size())
x_2 = x[:]
x_val = x_val.contiguous().view(1, *x_val.size())
x_2_val = x_val[:]
if torch.cuda.is_available():
    x = x.cuda()
    x_val = x_val.cuda()
    x_2 = x_2.cuda()
    x_2_val = x_2_val.cuda()
y = y.numpy()
y_val = y_val.numpy()

# model = torch.load('new_model').cuda()
# model.eval()
# model_cnn1.eval()

# First model
x_sal = model(Variable(x))
if torch.cuda.is_available():
    x_sal = x_sal.cpu()
x_sal_nmp = x_sal.squeeze().data.numpy()
# Blur the saliency map
sigma = 0.035*min(x_sal_nmp.shape)
x_sal_nmp = cv2.GaussianBlur(x_sal_nmp, (int(4*sigma), int(4*sigma)), sigma)

x_val_sal = model(Variable(x_val))
if torch.cuda.is_available():
    x_val_sal = x_val_sal.cpu()
x_val_sal_nmp = x_val_sal.squeeze().data.numpy()
# Blur the saliency map
x_val_sal_nmp = cv2.GaussianBlur(x_val_sal_nmp, (int(4*sigma), int(4*sigma)), sigma)

# Second model
x_2_sal = model_2(Variable(x_2))
if torch.cuda.is_available():
    x_2_sal = x_2_sal.cpu()
x_2_sal_nmp = x_2_sal.squeeze().data.numpy()
# Blur the saliency map
x_2_sal_nmp = cv2.GaussianBlur(x_2_sal_nmp, (int(4*sigma), int(4*sigma)), sigma)


x_2_val_sal = model(Variable(x_2_val))
if torch.cuda.is_available():
    x_2_val_sal = x_2_val_sal.cpu()
x_2_val_sal_nmp = x_2_val_sal.squeeze().data.numpy()
# Blur the saliency map
x_2_val_sal_nmp = cv2.GaussianBlur(x_2_val_sal_nmp, (int(4*sigma), int(4*sigma)), sigma)


# Plot the output
plt.figure(figsize=(20,8))
plt.subplot(2,4,1); plt.title('Original')
plt.imshow(original)
plt.subplot(2,4,2); plt.title('Ground Truth')
plt.imshow(y, cmap='gray')

plt.subplot(2,4,3)
# plt.imshow(x_sal_nmp1, cmap='gray'); plt.title('CNN+Context+LSTM')
plt.imshow(x_sal_nmp, cmap='gray'); plt.title('Model b20 e10')
# Second model
plt.subplot(2,4,4)
plt.imshow(x_2_sal_nmp, cmap='gray'); plt.title('Model b10 e20')

plt.subplot(2,4,5); plt.title('Original Val')
plt.imshow(original_val)
plt.subplot(2,4,6); plt.title('Ground Truth Val')
plt.imshow(y_val, cmap='gray')

plt.subplot(2,4,7)
plt.imshow(x_val_sal_nmp, cmap='gray'); plt.title('Model b20 e10')
# Second model
plt.subplot(2,4,8)
plt.imshow(x_2_val_sal_nmp, cmap='gray'); plt.title('Model b10 e20')


# plt.subplot(2,4,6)
# plt.imshow(x_sal_nmp2, cmap='gray'); plt.title('Seg.+Context+LSTM')

# plt.subplot(2,4,3)
# plt.imshow(x_sal_nmp3, cmap='gray'); plt.title('CNN+LSTM')
# plt.subplot(2,4,7)
# plt.imshow(x_sal_nmp4, cmap='gray'); plt.title('Seg.+LSTM')

# plt.subplot(2,4,4)
# plt.imshow(x_sal_nmp5, cmap='gray'); plt.title('CNN')

# plt.subplot(2,4,8)
# plt.imshow(x_sal_nmp6, cmap='gray'); plt.title('Seg.')
# plt.savefig('ResExamples/example_'+str(test_image_id)+'.png')
plt.show()

# model.eval()
# x_saln = model(Variable(x))
# x_saln_nmp = x_saln.squeeze().cpu().data.numpy()

# plt.imshow(x_saln_nmp, cmap='gray'); plt.title('new')
# plt.show()

In [None]:
# Define the Pearson Cross Correlation loss function
def PCCLoss_torch(x, y):
    """Computes Pearson Cross Correlation loss
    :param x: prediction
    :param y: label
    """
    vx = x - torch.mean(x)
    vy = y - torch.mean(y)
    
    loss = torch.sum(vx*y) / (torch.sqrt(torch.sum(vx ** 2)) * torch.sqrt(torch.sum(vy ** 2)))
    return loss

def PCCLoss_numpy(x, y):
    """Computes Pearson Cross Correlation loss
    :param x: prediction
    :param y: label
    """
    vx = x - np.mean(x)
    vy = y - np.mean(y)
    
    loss = np.sum(vx*y) / (np.sqrt(np.sum(vx ** 2)) * np.sqrt(np.sum(vy ** 2)))
    return loss

In [None]:
#TODO keep working here, strange divergence of results between using torch pipeline and numpy pipeline
mean_image = np.load('Dataset/Transformed/mean_image.npy').astype(np.float32)/255.
# Define a function for testing a model
def test_model(model, data_source, loss_fn=PCCLoss_numpy, input_size=(640, 480)):
    test_loader = torch.utils.data.DataLoader(data_source, batch_size=5, shuffle=True, num_workers=4)
    mean_image_batch = np.repeat(mean_image, 5, axis=0)
    testLosses = []
    
    for data in tqdm(test_loader):
        inputs, labels = data
        if torch.cuda.is_available():
            inputs = Variable(inputs.cuda())
            labels = Variable(labels.cuda())
        else:
            inputs = Variable(inputs)
            labels = Variable(labels)
            
        ### TORCH PIPELINE ###
        if loss_fn == PCCLoss_torch:
            outputs = model(inputs).squeeze()

            # Divides each value in each fixation map by the sum of all values in that fixation map
            labels_sum = torch.sum(labels.contiguous().view(labels.size(0),-1), dim=1)
            labels /= labels_sum.contiguous().view(*labels_sum.size(), 1, 1).expand_as(labels)
            
#             cv2.imshow('Output', outputs.cpu().data.numpy()[0, :, :])
#             cv2.imshow('Label', labels.cpu().numpy()[0, :, :])
#             cv2.waitKey(0)
        
        ### NUMPY PIPELINE ###
        if loss_fn == PCCLoss_numpy:
            # Problem: model yields outputs that are very low (zero-centered?) pixel values.
            outputs = model(inputs).squeeze()
            outputs = outputs.cpu().data.numpy()
            # Try to process the output with the mean image or this image's mean or something similar
            #outputs = np.add(outputs, mean_image_batch)
            
#             cv2.imshow('Output pre', outputs[0, :, :])
#             cv2.waitKey(0)
            
            # Resize the images to input size
            outputs = np.array([cv2.resize(output, input_size) for output in outputs])
            
            # Apply a Gaussian filter to blur the saliency maps
            sigma = 0.035*min(input_size[0], input_size[1])
            outputs = np.array([cv2.GaussianBlur(output, (int(4*sigma), int(4*sigma)), sigma) for output in outputs])

            # Divides each value in each fixation map by the sum of all values in that fixation map
            labels_sum = torch.sum(labels.contiguous().view(labels.size(0),-1), dim=1)
            labels /= labels_sum.contiguous().view(*labels_sum.size(), 1, 1).expand_as(labels)
            
            labels = labels.cpu().numpy()
            
#             print(outputs.shape)
#             temp = np.hstack((outputs[0, :, :], labels[0, :, :]))
#             cv2.imshow('Output and label', temp)
#             cv2.imshow('Output', outputs[0, :, :])
#             cv2.imshow('Label', labels[0, :, :])
#             cv2.waitKey(0)

        ### BOTH PIPELINES ###
        
        
        
        testLosses.append(loss_fn(outputs, labels).item())
    
    return testLosses

In [None]:
# Obtaining PCC Loss values on the test set for different models:

# loss_fn = nn.KLDivLoss()
# Use Pearson Cross Correlation loss
loss_fn = PCCLoss_numpy

# test on validation data, as we don't have ground truths for the test data (this was also done in original DSCLRCN paper)
# test_loader = torch.utils.data.DataLoader(val_data, batch_size=5, shuffle=True, num_workers=4)

TestLosses_CNN1 = []
TestLosses_Seg1 = []
TestLosses_CNN2 = []
TestLosses_Seg2 = []
TestLosses_CNN3 = []
TestLosses_Seg3 = []
# for i, data in enumerate(tqdm(test_loader)):
#     inputs, labels = data
#     if torch.cuda.is_available():
#         inputs = Variable(inputs.cuda())
#         labels = Variable(labels.cuda())
#     else:
#         inputs = Variable(inputs)
#         labels = Variable(labels)
    
#     #test_outputs_CNN1 = model_cnn1(inputs).squeeze()
#     test_outputs_Seg1 = model(inputs).squeeze()
    
#     #test_outputs_CNN2 = model_cnn2(inputs).squeeze()
#     test_outputs_Seg2 = model_2(inputs).squeeze()
    
#     #test_outputs_CNN3 = model_cnn3(inputs).squeeze()
#     #test_outputs_Seg3 = model_seg3(inputs).squeeze()
    
#     labels_sum = torch.sum(labels.contiguous().view(labels.size(0),-1), dim=1)
#     labels /= labels_sum.contiguous().view(*labels_sum.size(), 1, 1).expand_as(labels)

#     #TestLosses_CNN1.append(loss_fn(torch.log(test_outputs_CNN1), labels).data[0])
#     TestLosses_Seg1.append(loss_fn(test_outputs_Seg1, labels).data[0])
    
#     #TestLosses_CNN2.append(loss_fn(torch.log(test_outputs_CNN2), labels).data[0])
#     TestLosses_Seg2.append(loss_fn(test_outputs_Seg2, labels).data[0])
    
#     #TestLosses_CNN3.append(loss_fn(torch.log(test_outputs_CNN3), labels).data[0])
#     #TestLosses_Seg3.append(loss_fn(torch.log(test_outputs_Seg3), labels).data[0])

TestLosses_Seg1 = test_model(model, val_data, loss_fn=loss_fn, input_size=(128, 96))

TestLosses_Seg2 = test_model(model_2, val_data, loss_fn=PCCLoss_torch, input_size=(128, 96))

print()
#print('TestLoss (CNN): ', np.mean(TestLosses_CNN1), np.mean(TestLosses_CNN2), np.mean(TestLosses_CNN3))
print('Pearson Cross Correlation Loss on Validation set (Seg):\nModel 1:', np.mean(TestLosses_Seg1), "\nModel 2:" , np.mean(TestLosses_Seg2))#, "\nModel 3:", np.mean(TestLosses_Seg3))