In [1]:
import torch
from torchvision import transforms, models
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import argparse
import os
from datetime import datetime
import shutil
import numpy as np

from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
import cv2

from utils_logging import setup_logger

In [2]:
from models.shashimal import Shashimal
from models.__init__ import save_checkpoint, resume_checkpoint
from dataloader.chong import GazeDataset, GooDataset
from dataloader import chong_imutils
from training.train_shashimal import train, test, GazeOptimizer

  from .collection import imread_collection_wrapper


In [3]:
# Logger will save the training and test errors to a .log file 
logger = setup_logger(name='first_logger',
                      log_dir ='./logs/',
                      log_file='train_chong_gooreal.log',
                      log_format = '%(asctime)s %(levelname)s %(message)s',
                      verbose=True)

In [4]:
batch_size=4
workers=4

images_dir = '/home/shashimal/Desktop/gooreal/finalrealdatasetImgsV2/'
pickle_path = '/home/shashimal/Desktop/gooreal/oneshotrealhumansNew.pickle'
test_images_dir = '/home/shashimal/Desktop/gooreal/finalrealdatasetImgsV2/'
test_pickle_path = '/home/shashimal/Desktop/gooreal/testrealhumansNew.pickle'
train_set = GooDataset(images_dir, pickle_path, 'train')
train_data_loader = DataLoader(dataset=train_set,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=4)

test_set = GooDataset(test_images_dir, test_pickle_path, 'test')
test_data_loader = DataLoader(test_set, batch_size=batch_size//2,
                            shuffle=False, num_workers=4)

Number of Images: 2450
Number of Images: 2146


In [5]:
img, face, head_channel, gaze_heatmap, image_path, gaze_inside,shifted_grids,final = next(iter(train_data_loader))

In [6]:
import gc

gc.collect()

torch.cuda.empty_cache()

In [7]:
import time
import copy
from tqdm import tqdm
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def train_model(model, criterion, optimizer, logger ,num_epochs=5,):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)



        model.train()  # Set model to training mode

        running_loss = []
        running_corrects = 0

        # Iterate over data.
        for i, (img, face, head_channel, gaze_heatmap, image_path, gaze_inside,shifted_targets) in tqdm(enumerate(train_data_loader), total=len(train_data_loader)) :
            images = img.cuda()
            head_channel =  head_channel.cuda()
            face = face.cuda()
            shifted_targets = shifted_targets.cuda().squeeze()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            outputs = model(images,head_channel,face)
            total_loss = criterion(outputs[0], shifted_targets[:, 0, :].max(1)[1])
            for j in range(1, len(outputs)):
                total_loss += criterion(outputs[j], shifted_targets[:, j, :].max(1)[1])

            total_loss = total_loss / (len(outputs) * 1.0)

            total_loss.backward()
            optimizer.step()

            inputs_size = images.size(0)
            
            running_loss.append(total_loss.item())
            if i % 100 == 99:
                logger.info('%s'%(str(np.mean(running_loss))))
                running_loss = [] 


    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [8]:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

# model_ft = models.resnet18(pretrained=True)
# num_ftrs = model_ft.fc.in_features
# # Here the size of each output sample is set to 2.
# # Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
# model_ft.fc = nn.Linear(num_ftrs, 6)

model_ft = Shashimal()


model_ft = model_ft.to(device)

criterion = nn.NLLLoss().cuda()

# Observe that all parameters are being optimized
start_epoch = 0
max_epoch = 5
learning_rate = 1e-4

# Initializes Optimizer
gaze_opt = GazeOptimizer(model_ft, learning_rate)
optimizer = gaze_opt.getOptimizer(start_epoch)

In [9]:
# print(model_ft)

In [10]:
model_ft = train(model_ft,train_data_loader, criterion, optimizer, logger,
                       num_epochs=5)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


torch.Size([4, 256, 7, 7])
torch.Size([4, 169])


  0%|          | 0/613 [00:04<?, ?it/s]


KeyboardInterrupt: 

In [None]:
torch.save(model_ft.state_dict(), 'shashimal.pth')

In [None]:
shashimal_model = Shashimal()
shashimal_model.load_state_dict(torch.load('shashimal.pth'))
shashimal_model.cuda()

Shashimal(
  (relu): ReLU(inplace=True)
  (sigmoid): Sigmoid()
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (avgpool): AvgPool2d(kernel_size=7, stride=1, padding=0)
  (conv1_scene): Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1_scene): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1_scene): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inp

In [None]:
test(shashimal_model, test_data_loader, logger, save_output=True)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
  1%|          | 10/1073 [00:06<11:28,  1.54it/s]


KeyboardInterrupt: 

In [None]:

img, face, head_channel, eye, gaze_heatmap, gaze, gaze_inside, image_path,grid = next(iter(test_data_loader))
print(image_path)
with torch.no_grad():
    images = img.cuda()
    head_channel =  head_channel.cuda()
    face = face.cuda()
    outputs = shashimal_model(images,head_channel,face)
    _, preds = torch.max(outputs, 1)
    print(preds)
    print(grid)


ValueError: too many values to unpack (expected 9)

In [None]:
def visualize_model(model, num_images=20):
    was_training = model.training
    model.eval()
    images_so_far = 0

    with torch.no_grad():
        for i, (img, face, head_channel, eye, gaze_heatmap, gaze, gaze_inside, image_path) in enumerate(test_data_loader):
            inputs = img.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            print(preds)
            if i == num_images:
                break

In [None]:
visualize_model(model_ft)

tensor([0, 1], device='cuda:0')
tensor([0, 0], device='cuda:0')
tensor([1, 0], device='cuda:0')
tensor([1, 1], device='cuda:0')
tensor([1, 1], device='cuda:0')
tensor([1, 1], device='cuda:0')
tensor([1, 0], device='cuda:0')
tensor([1, 0], device='cuda:0')
tensor([0, 1], device='cuda:0')
tensor([1, 0], device='cuda:0')
tensor([0, 1], device='cuda:0')
tensor([5, 0], device='cuda:0')
tensor([1, 0], device='cuda:0')
tensor([0, 0], device='cuda:0')
tensor([1, 1], device='cuda:0')
tensor([0, 0], device='cuda:0')
tensor([0, 1], device='cuda:0')
tensor([0, 1], device='cuda:0')
tensor([0, 1], device='cuda:0')
tensor([1, 1], device='cuda:0')
tensor([1, 1], device='cuda:0')
