In [None]:
from __future__ import print_function
import numpy as np
from torch.utils.data import Dataset, DataLoader, TensorDataset
import os
import imageio
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch
import timeit
import matplotlib.pyplot as plt
import torchvision
from torch.optim.lr_scheduler import StepLR

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/


In [None]:
DATA_FOLDER = "/content/drive/My Drive/Eye_fixation"

In [None]:
root_dir_train = DATA_FOLDER + '/cv2_training_data/'
image_train = root_dir_train + 'train_images.txt'
fix_train = root_dir_train + 'train_fixations.txt'
root_dir_valid = DATA_FOLDER + '/cv2_validation_data/'
image_valid = root_dir_valid + 'val_images.txt'
fix_valid = root_dir_valid + 'val_fixations.txt'

root_dir_test = DATA_FOLDER + '/cv2_testing_data/'
test_images = root_dir_test + 'test_images.txt'

In [None]:
def read_file(filename):
    lines = []
    with open(filename, 'r') as file:
        for line in file: 
            line = line.strip() #or some other preprocessing
            lines.append(line)
    return lines


class FixationDataset(Dataset):
    def __init__(self, root_dir, image_file, fixation_file, transform=None):
        self.root_dir = root_dir
        self.image_files = read_file(image_file)
        self.fixation_files = read_file(fixation_file)
        self.transform = transform
        assert(len(self.image_files) == len(self.fixation_files))

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        img_name = os.path.join(self.root_dir, self.image_files[idx])
        image = imageio.imread(img_name)

        fix_name = os.path.join(self.root_dir, self.fixation_files[idx])
        fix = imageio.imread(fix_name)

        sample = {'image': image, 'fixation': fix}
        if self.transform:
            sample = self.transform(sample)
            
        return sample

In [None]:
class Rescale():
    def __init__(self):
        pass
    def __call__(self, sample):
        self.image = sample['image'].astype(np.float32) / 255.0
        self.fixation = sample['fixation'].astype(np.float32) / 255.0
        return {'image': self.image, 'fixation': self.fixation}
    
class ToTensor():
    def __init__(self):
        pass
    def __call__(self, sample):
        self.image = sample['image'].T
        self.image = torch.from_numpy(self.image)
        self.fixation = sample['fixation']
        self.fixation = np.expand_dims(self.fixation, axis=0)
        self.fixation = torch.from_numpy(self.fixation)
        return {'image': self.image, 'fixation': self.fixation}
    
class Normalize():
    def __init__(self):
        self.mean = np.array([0.485, 0.456, 0.406])
        self.std = np.array([0.229, 0.224, 0.225])
    def __call__(self, sample):
        self.img0 = ((sample['image'][0,:,:] - self.mean[0]) / self.std[0]).reshape(1,224,224)
        self.img1 = ((sample['image'][1,:,:] - self.mean[1]) / self.std[1]).reshape(1,224,224)
        self.img2 = ((sample['image'][2,:,:] - self.mean[2]) / self.std[2]).reshape(1,224,224)
            
        self.fixation = sample['fixation']    
        self.image = np.vstack((self.img0, self.img1, self.img2))
        self.image = torch.from_numpy(self.image)
        return {'image': self.image, 'fixation': self.fixation}

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch

class Encoder(nn.Module):
    def __init__(self): #model_dict=None):
        super(Encoder, self).__init__()

        from torchvision import models
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model = models.vgg16_bn(pretrained=False).features
        
    def forward(self, xb):
        xb = self.model(xb)
        
        return xb
        

class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        self.bn7_3 = nn.BatchNorm2d(num_features=512)
        self.bn8_1 = nn.BatchNorm2d(num_features=256)
        self.bn8_2 = nn.BatchNorm2d(num_features=256)
        self.bn9_1 = nn.BatchNorm2d(num_features=128)
        self.bn9_2 = nn.BatchNorm2d(num_features=128)
        self.bn10_1 = nn.BatchNorm2d(num_features=64)
        self.bn10_2 = nn.BatchNorm2d(num_features=64)
        self.drop_layer = nn.Dropout2d(p=0.5)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', 
                                    align_corners=False)
        self.conv7_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv8_1 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
        self.conv8_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv9_1 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
        self.conv9_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.conv10_1 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
        self.conv10_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.output = nn.Conv2d(64, 1, kernel_size=1, padding=0)
        
    def forward(self, xb):
        xb = F.relu(self.bn7_3(self.conv7_3(xb)))
        xb = F.relu(self.bn8_1(self.conv8_1(xb)))
        xb = F.relu(self.bn8_2(self.conv8_2(xb)))
        xb = self.upsample(xb)
        xb = F.relu(self.bn9_1(self.conv9_1(xb)))
        xb = F.relu(self.bn9_2(self.conv9_2(xb)))
        xb = self.upsample(xb)
        xb = F.relu(self.bn10_1(self.conv10_1(xb)))
        xb = F.relu(self.bn10_2(self.conv10_2(xb)))
        return self.output(xb)
        
        
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.encoder = Encoder()
        self.decoder = Decoder()
        
    def forward(self, inp):
        inp = self.encoder(inp)
        inp = self.decoder(inp)
        return inp

In [None]:
class BCELossWithDownsampling():
    def __init__(self):
        self.downsample = nn.Upsample(scale_factor=1/8, mode='bilinear', 
                                    align_corners=False) #nn.AvgPool2d(4, stride=4, count_include_pad=False)
        self.loss_fcn = nn.BCEWithLogitsLoss()
        
    def __call__(self, pred, y):
        return self.loss_fcn(pred, self.downsample(y))

In [None]:
composed = torchvision.transforms.Compose([Rescale(), ToTensor()])

In [None]:
bs = 32 ## batch_size_train
bs_valid = 32
train_dl = DataLoader(FixationDataset(root_dir_train, image_train, fix_train, 
                                           transform=composed), batch_size=bs, shuffle=True)

valid_dl = DataLoader(FixationDataset(root_dir_valid, image_valid, fix_valid,
                                     transform=composed), batch_size=bs_valid)


In [None]:
model = Generator()
loss_func = BCELossWithDownsampling()
MODEL_PATH = DATA_FOLDER + '/eye_fixation_weights.pth'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using ", device)
model.to(device)

new_epochs = 100
old_epochs = 0
lr = 0.0001

opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0.00001)
# scheduler = StepLR(opt, step_size=30, gamma=0.1)

# if os.path.exists(MODEL_PATH):
#     checkpoint = torch.load(MODEL_PATH)
#     model.load_state_dict(checkpoint['model_state_dict'])
#     opt = torch.optim.Adam(model.parameters(), lr=lr)
#     opt.load_state_dict(checkpoint['optimizer_state_dict'])
#     old_epochs = checkpoint['epoch']
#     loss = checkpoint['loss']
#     print("old epochs:", old_epochs)

tot_epochs = new_epochs + old_epochs

Using  cuda:0


In [None]:
tot_train_loss = []
tot_valid_loss = []
train_losses = []
valid_losses = []

for epoch in range(new_epochs):
    
    curr_epoch = epoch + old_epochs + 1
    model.train()
    start = timeit.default_timer()
    print('epoch: [{}/{}]'.format(curr_epoch, tot_epochs))

    for i, xb in enumerate(train_dl):
        inp = xb['image'].cuda()
        fix_maps = xb['fixation'].cuda()

        predictions = model(inp)
        loss = loss_func(predictions, fix_maps)
        
        loss.backward()
        opt.step()
        opt.zero_grad()
        tot_train_loss.append(loss.item())

    train_loss = np.mean(tot_train_loss)
    train_losses.append(train_loss)
    stop = timeit.default_timer()

    print('TRAIN - loss:{:.4f}, time: {:.2f}s \n'.format(train_loss, (stop-start)))

    if curr_epoch % 20 == 0:
      torch.save({'epoch':curr_epoch,
        'model_state_dict':model.state_dict(),
        'optimizer_state_dict': opt.state_dict(),
        'loss': loss}, MODEL_PATH)

    # if curr_epoch == 50:
    #   for param_group in opt.param_groups:
    #     param_group['lr'] = 0.00001
    #     print("---------------------L.R CHANGED-------------------")

    if curr_epoch % 10 == 0:
      model.eval()
      with torch.no_grad():
        for i, xb in enumerate(valid_dl):

          X = xb['image'].to(device)
          y = xb['fixation'].to(device)

          pred = model(X)
          loss = loss_func(pred, y)

          tot_valid_loss.append(loss.item())
        
        valid_loss = np.mean(tot_valid_loss)
        valid_losses.append(valid_loss)
        print('VALIDATION - loss: {:.6f}\n'.format(valid_loss))

    # scheduler.step()

epoch: [1/110]




TRAIN - loss:0.4545, time: 4996.54s 

epoch: [2/110]
TRAIN - loss:0.4109, time: 65.72s 

epoch: [3/110]
TRAIN - loss:0.3833, time: 65.33s 

epoch: [4/110]
TRAIN - loss:0.3631, time: 65.54s 

epoch: [5/110]
TRAIN - loss:0.3468, time: 65.88s 

epoch: [6/110]
TRAIN - loss:0.3332, time: 65.27s 

epoch: [7/110]
TRAIN - loss:0.3219, time: 65.45s 

epoch: [8/110]
TRAIN - loss:0.3121, time: 65.60s 

epoch: [9/110]
TRAIN - loss:0.3033, time: 65.53s 

epoch: [10/110]
TRAIN - loss:0.2954, time: 65.96s 

VALIDATION - loss: 0.228496

epoch: [11/110]
TRAIN - loss:0.2883, time: 65.70s 

epoch: [12/110]
TRAIN - loss:0.2823, time: 65.36s 

epoch: [13/110]
TRAIN - loss:0.2765, time: 65.79s 

epoch: [14/110]
TRAIN - loss:0.2710, time: 65.47s 

epoch: [15/110]
TRAIN - loss:0.2657, time: 65.93s 

epoch: [16/110]
TRAIN - loss:0.2608, time: 65.40s 

epoch: [17/110]
TRAIN - loss:0.2562, time: 65.59s 

epoch: [18/110]
TRAIN - loss:0.2519, time: 65.32s 

epoch: [19/110]
TRAIN - loss:0.2479, time: 65.82s 

epoch