In [40]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd

import torch

import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torchvision.transforms as transforms
from torch.utils.data import  Dataset, TensorDataset, DataLoader

import lib.pytorch_trainer as ptt

from src.imgnet_utils import denormalize

from src.data_loader import _create_dataLoader

from src.Dataset import KaggleSafeDriverDataset

from src.plot_utils import (plot_classes, plot_distribution,
                            statistical_analysis_image, classDistribution,
                            plot_metrics,visualize_predictions,
                            plot_cm_train_valid,plot_layers_weight)

In [41]:
print("{} GPU's available:".format(torch.cuda.device_count()) )
cpu_count = torch.multiprocessing.cpu_count()
print("\ncpu_count: {}".format(cpu_count))

4 GPU's available:

cpu_count: 24


In [42]:
use_gpu = False
use_DataParalel= True 
use_CPU= False         # http://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html 

if use_gpu:
    if use_DataParalel:
        print("Using DataParalel in all {} GPUS".format(torch.cuda.device_count()))
    else:
        print('Using only one GPU') #{} '.format(device_id))
else:
    print("Using {} CPU's".format(cpu_count))

Using 24 CPU's



# Choose dataset 

In [43]:
path2train = "/mnt/home/e209440/data/train" 

# Images transformation

In [44]:
imagenet_mean = np.array([0.485, 0.456, 0.406])
imagenet_std  = np.array([0.229, 0.224, 0.225])

img_width = img_height=300 #to use InceptionV3 it must img_width and img_height be changed to 300

# Data augmentation and normalization for training 
data_transforms = {
    'train': transforms.Compose([
        transforms.Scale((img_width, img_height)),
        transforms.ToTensor(),
        transforms.Normalize(imagenet_mean, imagenet_std),
    ])
}        

# Creating  Dataset & Dataloader

In [45]:
batch_size = 32
use_only = 1.0 # Use only is the percentage of the full dataset

In [46]:
dsets = {
    'train': KaggleSafeDriverDataset(path2train, transforms=data_transforms['train'],use_only=use_only)
}

In [47]:
dset_loaders = _create_dataLoader(dsets, batch_size, pin_memory=False, use_shuffle= False)

In [9]:
path2results = "/mnt/home/r120084/project/state-farm-distracted-driver-detection/results"

In [35]:
print("Loaded features with shapes: \n")
npzfile = np.load(path2results+"/" + "results_valid_Resnet18_2017-11-26-14-41.npz")

Loaded features with shapes: 



In [37]:
npzfile['pred']

array([[  7.1797967 ,  -8.97665405, -25.40347862, ...,  -2.87480593,
          6.42260313,  22.02296066],
       [ -8.17194271,  -0.84254789,  -5.31919765, ...,  -4.72954273,
         -2.83368874,  -2.24675798],
       [ -2.64065576,   3.79761171,  16.34770203, ...,  -4.54322481,
         -6.01621675, -18.60191727],
       ..., 
       [ -0.41439867, -31.58727837, -15.49133015, ...,  -5.05358171,
        -10.47488117,  -9.14722633],
       [  0.76069754,   3.82887578,  15.27601433, ...,  -4.48759747,
         -1.33089745, -11.6917448 ],
       [ -2.36654735, -23.29958916, -10.22987747, ...,  -9.92538166,
         -6.81051111, -10.76055813]], dtype=float32)

In [38]:
x=torch.from_numpy(npzfile['pred'])

In [39]:
x


 7.1798e+00 -8.9767e+00 -2.5403e+01  ...  -2.8748e+00  6.4226e+00  2.2023e+01
-8.1719e+00 -8.4255e-01 -5.3192e+00  ...  -4.7295e+00 -2.8337e+00 -2.2468e+00
-2.6407e+00  3.7976e+00  1.6348e+01  ...  -4.5432e+00 -6.0162e+00 -1.8602e+01
                ...                   ⋱                   ...                
-4.1440e-01 -3.1587e+01 -1.5491e+01  ...  -5.0536e+00 -1.0475e+01 -9.1472e+00
 7.6070e-01  3.8289e+00  1.5276e+01  ...  -4.4876e+00 -1.3309e+00 -1.1692e+01
-2.3665e+00 -2.3300e+01 -1.0230e+01  ...  -9.9254e+00 -6.8105e+00 -1.0761e+01
[torch.FloatTensor of size 4484x10]

In [None]:
result_train = getPrediction(result_train)
result_valid = getPrediction(result_valid)
# result_test['pred'] must be an array of probabilities to make the submission

In [None]:
correct_train = (result_train['true'] == result_train['pred']).sum()
correct_valid = (result_valid['true'] == result_valid['pred']).sum()

In [None]:
visualize_predictions(dsets['valid'], result_valid, correct_pred = True)

In [None]:
#move this to Data Visualization
#plot_layers_weight(dsets,img_width=img_width, img_height=img_height,conv_model = model.mrnc,use_gpu=use_gpu)

In [None]:
# this cell it will be used to show what kind of transformations we have tried to do, do not move to a .py file because we will work on it yet.

composed = transforms.Compose([transforms.Scale((img_width, img_height)),
        transforms.ToTensor(),
        transforms.Normalize(imagenet_mean, imagenet_std)])

composed1 = transforms.Compose([
                                transforms.CenterCrop((220,320)),
                                transforms.Scale((img_width, img_height)),
                                transforms.ToTensor(),
                                transforms.Normalize(imagenet_mean, imagenet_std)])

# Apply each of the above transforms on sample.
fig = plt.figure(figsize = (20,5))
for i, tsfrm in enumerate([composed, composed1]):
    dset_aux = KaggleSafeDriverDataset(path2train, transforms=tsfrm, use_only=use_only)
    (inputs, cls) = dset_aux[60]
    print(inputs.numpy().shape)
    img = denormalize(inputs.numpy())
    img = np.clip(img, 0, 1.0)
    plt.subplot(1,3,i+1)
    plt.imshow(img)
    plt.axis('off')

# Plotting some examples

In [None]:
plot_classes(dset_loaders['train'])

# Data Analysis

In [None]:
statistical_analysis_image(dset_loaders['train'])

In [None]:
classDistribution(dsets['train'])

In [None]:
# load the metrics of the train model (could be a csv file or something else) an plot them

In [None]:
#load model and plot weights

In [None]:
path2metrics = '/mnt/home/r120084/project/state-farm-distracted-driver-detection/metrics/metrics_Resnet18_2017-11-25-12-38.csv'
metrics = pd.read_csv(path2metrics).to_dict()

In [None]:
#renaming dictionary keys
metrics['train']['acc'] = metrics['train'].pop(0)
metrics['train']['losses'] = metrics['train'].pop(1)

metrics['valid']['acc'] = metrics['valid'].pop(0)
metrics['valid']['losses'] = metrics['valid'].pop(1)

In [None]:
plot_metrics(metrics)

In [None]:
path2results = '/mnt/home/r120084/project/state-farm-distracted-driver-detection/results/results_Resnet18_2017-11-25-15-49.csv'
results = pd.read_csv(path2results).to_dict()

In [None]:
import ast

#renaming dictionary keys
results['train']['pred'] = results['train'].pop(0)
results['train']['true'] = results['train'].pop(1) 

results['valid']['pred'] = results['valid'].pop(0)
results['valid']['true'] = results['valid'].pop(1)

In [None]:
results

In [None]:
#visualize_predictions(dsets['valid'], result_valid, correct_pred = False)

In [None]:
visualize_predictions(dsets['valid'], result_valid, correct_pred = True)

In [None]:
use_resnet = True
use_inception = False
use_denseNet = False

if use_resnet:
    print('Using ResNet model')
    model_name = "ResNet"
    model = MyResNet()
if use_inception:
    print('Using Inception model')
    model_name = "Inception"
    model = MyInception()
if use_denseNet:
    print('Using DenseNet model')
    model_name = "DenseNet"    
    model = MyDenseNet()

In [None]:
if use_gpu:
    if use_DataParalel:
        print("Using all GPU's ")
        model.mrnc = torch.nn.DataParallel(model.mrnc) #device_ids=[1,3]
        model.mrnc = model.mrnc.cuda()
        model.mrnd = torch.nn.DataParallel(model.mrnd) #device_ids=[1,3]
        model.mrnd = model.mrnd.cuda()
    else:
        print('Using GPU')# {}'.format(device_id))
        model.cuda()
else:
    print("Using CPU's")

In [None]:
#move this to Data Visualization
#plot_layers_weight(dsets,img_width=img_width, img_height=img_height,conv_model = model.mrnc,use_gpu=use_gpu)