In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt 

import torch

import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torchvision.transforms as transforms
from torch.utils.data import  Dataset, TensorDataset, DataLoader


import lib.pytorch_trainer as ptt

from src.imgnet_utils import denormalize

from src.data_loader import _create_dataLoader

from src.Dataset import KaggleSafeDriverDataset

from src.plot_utils import (plot_classes, plot_distribution,
                            statistical_analysis_image, classDistribution,
                            plot_metrics,visualize_predictions,
                            plot_cm_train_valid,plot_layers_weight)
      
from src.convnet_models import (MyResNet, MyInception, MyDenseNet)

from src.extractor_utils import (predict, getPrediction,features_saving,features_loading)

from utils.utils import (create_submission ,metrics2csv, save_results)

In [2]:
print("{} GPU's available:".format(torch.cuda.device_count()) )
cpu_count = torch.multiprocessing.cpu_count()
print("\ncpu_count: {}".format(cpu_count))

4 GPU's available:

cpu_count: 24


In [3]:
use_gpu = torch.cuda.is_available()
use_DataParalel= True 
use_CPU= False        

if use_gpu:
    if use_DataParalel: 
        print("Using DataParalel in all {} GPUS".format(torch.cuda.device_count()))
    else:
        print('Using only one GPU') 
if use_CPU:         # http://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html 
    print("Using {} CPU's".format(cpu_count))

Using DataParalel in all 4 GPUS


In [4]:
if use_gpu:
    !free -h
    !nvidia-smi

              total        used        free      shared  buff/cache   available
Mem:           220G         99G         50G         10G         71G        108G
Swap:            0B          0B          0B
Sun Nov 26 14:51:26 2017       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.90                 Driver Version: 384.90                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00003609:00:00.0 Off |                    0 |
| N/A   62C    P0   138W / 149W |   9440MiB / 11439MiB |     99%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Tesla K80           Off  | 00004BDC:00:00.0 Off |                    0 |
| N/A   67C    P0    59W / 149W |    485MiB 


# Choose dataset 

In [5]:
path2train = "/mnt/home/e209440/data/train" 
path2test = "/mnt/home/e209440/data/test" 

path2features= "/mnt/home/e209440/state-farm-distracted-driver-detection/features/" 
#path2features= "/mnt/home/r120084/state-farm-distracted-driver-detection/features/" 

load_features = True

# Images transformation

In [6]:
imagenet_mean = np.array([0.485, 0.456, 0.406])
imagenet_std  = np.array([0.229, 0.224, 0.225])

img_width = img_height=300 #to use InceptionV3 it must img_width and img_height be changed to 300

# Data augmentation and normalization for training 
data_transforms = {
    'train': transforms.Compose([
        transforms.Scale((img_width, img_height)),
        transforms.ToTensor(),
        transforms.Normalize(imagenet_mean, imagenet_std),
    ]),
    'valid': transforms.Compose([
        transforms.Scale((img_width, img_height)),
        transforms.ToTensor(),
        transforms.Normalize(imagenet_mean, imagenet_std),
    ]),
}        

# Creating  Dataset & Dataloader

In [7]:
batch_size = 32
use_only = 1.0 # Use only is the percentage of the full dataset

In [8]:
dsets = {
    'train': KaggleSafeDriverDataset(path2train, transforms=data_transforms['train'],use_only=use_only),
    'valid': KaggleSafeDriverDataset(path2train, transforms=data_transforms['valid'],use_only=use_only, is_val=True, val_size=0.2),
    'test':  KaggleSafeDriverDataset(path2test, transforms=data_transforms['valid'],use_only=use_only, is_test=True),
}

In [9]:
dset_loaders = _create_dataLoader(dsets, batch_size, pin_memory=False, use_shuffle= True)

In [10]:
dset_sizes = {x: len(dsets[x]) for x in ['train','valid', 'test']} 
dset_classes = len(dsets['train'].y)
dset_classes, dset_sizes

# Dataset have much more samples than datatrain ***It comes from the test.zip****

(17940, {'test': 79726, 'train': 17940, 'valid': 4484})

# Checking GPU usage

In [11]:
!free -h
!nvidia-smi

              total        used        free      shared  buff/cache   available
Mem:           220G         99G         50G         10G         71G        108G
Swap:            0B          0B          0B
Sun Nov 26 14:51:39 2017       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.90                 Driver Version: 384.90                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00003609:00:00.0 Off |                    0 |
| N/A   63C    P0   133W / 149W |   9440MiB / 11439MiB |     99%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Tesla K80           Off  | 00004BDC:00:00.0 Off |                    0 |
| N/A   67C    P0    59W / 149W |    485MiB 

# Choosing Model

In [12]:
use_resnet = True
use_inception = False
use_denseNet = False

if use_resnet:
    print('Using ResNet model')
    model_name = "ResNet"
    model = MyResNet()
elif use_inception:
    print('Using Inception model')
    model_name = "Inception"
    model = MyInception()
elif use_denseNet:
    print('Using DenseNet model')
    model_name = "DenseNet"    
    model = MyDenseNet()

Using ResNet model


In [13]:
if use_gpu:
    if use_DataParalel:
        print("Using all GPU's ")
        model.mrnc = torch.nn.DataParallel(model.mrnc) #device_ids=[1,3]
        model.mrnc = model.mrnc.cuda()
        model.mrnd = torch.nn.DataParallel(model.mrnd) #device_ids=[1,3]
        model.mrnd = model.mrnd.cuda()
    else:
        print('Using GPU')# {}'.format(device_id))
        model.cuda()
else:
    print("Using CPU's")

Using all GPU's 


In [14]:
#print(model)

In [None]:
#extract features from images
convOutput_train = predict(dset_loaders['train'], model.mrnc,use_gpu=use_gpu)
convOutput_valid = predict(dset_loaders['valid'], model.mrnc,use_gpu=use_gpu)
convOutput_test = predict(dset_loaders['test'], model.mrnc,use_gpu=use_gpu)

In [None]:
print(convOutput_train['true'].size(), convOutput_train['pred'].size())
print(convOutput_valid['true'].size(), convOutput_valid['pred'].size())
print(convOutput_test['true'].size(), convOutput_test['pred'].size())

In [None]:
print(convOutput_train['true'].type(), convOutput_train['pred'].type())

# Saving features

In [None]:
sav_feats= {
    'train': (convOutput_train['pred'], convOutput_train['true'],model_name),
    'valid': (convOutput_valid['pred'], convOutput_valid['true'],model_name),
    'test': (convOutput_test['pred'], convOutput_test['true'],model_name)

}

In [None]:
sav_feats['train'][2]

In [None]:
features_saving(path2features,sav_feats)

# Loading features

In [15]:
model_name='ResNet'

In [16]:
load_feat= features_loading(path2features,model_name)

Loaded features with shapes: 


train:
pred torch.Size([17940, 6400]), true torch.Size([17940])

valid:
pred torch.Size([4484, 6400]), true torch.Size([4484])

test:
pred torch.Size([79726, 6400]), true torch.Size([79726])


In [17]:
load_feat['train'][0].shape,load_feat['train'][1].shape

(torch.Size([17940, 6400]), torch.Size([17940]))

In [18]:
if load_features:
    conv_dset ={
    'train': TensorDataset(load_feat['train'][0], load_feat['train'][1]),
    'valid': TensorDataset(load_feat['valid'][0], load_feat['valid'][1]),
    'test': TensorDataset(load_feat['test'][0], load_feat['test'][1])
    }
else:
    conv_dset ={
    'train': TensorDataset(convOutput_train['train'][0], convOutput_train['train'][1]),
    'valid': TensorDataset(convOutput_valid['valid'][0], convOutput_valid['valid'][1]),
    'test': TensorDataset(convOutput_test['test'][0], convOutput_test['test'][1])
    }

In [19]:
dset_loaders_convnet = _create_dataLoader(conv_dset, batch_size, 
                        pin_memory=False, use_shuffle= True)

In [20]:
dset_convnet_sizes = {x: len(dset_loaders_convnet[x]) for x in ['train','valid', 'test']} 
dset_convnet_sizes

{'test': 2492, 'train': 561, 'valid': 141}

In [21]:
path2saveModel = '/mnt/home/r120084/project/models/distractdriver'
#path2saveModel = '/mnt/home/e209440/models/ResNetDistractDriver' 

savebest = ptt.ModelCheckpoint(path2saveModel,reset=True, verbose=1)

In [22]:
loss_fn = nn.CrossEntropyLoss()
num_epochs = 50

optimizer =  optim.Adam(model.mrnd.parameters(), lr=1e-3)
scheduler = StepLR(optimizer, step_size=5, gamma=0.75)

params = {'model' : model.mrnd, 
    'criterion': loss_fn,  
    'optimizer': optimizer, 
    'callbacks': [savebest, ptt.AccuracyMetric(), ptt.PrintCallback()] #ptt.PlotCallback(),
}

In [23]:
trainer = ptt.DeepNetTrainer(use_gpu=use_gpu,**params)

In [24]:
trainer.fit_loader(num_epochs, dset_loaders_convnet['train'], dset_loaders_convnet['valid'])

Start training for 50 epochs
  1: 159.7s   T: 1.83189 0.33584   V: 0.85789 0.66815 best
  2:  21.5s   T: 0.55467 0.80808   V: 0.20269 0.94759 best
  3:  22.0s   T: 0.24458 0.91996   V: 0.18238 0.94246 best
  4:  21.7s   T: 0.18586 0.94091   V: 0.16942 0.94759 best
  5:  21.5s   T: 0.16022 0.94939   V: 0.07498 0.97837 best
  6:  21.4s   T: 0.12702 0.95909   V: 0.06736 0.98171 best
  7:  21.6s   T: 0.14697 0.95524   V: 0.19906 0.93599 
  8:  21.5s   T: 0.08993 0.97074   V: 0.06396 0.98439 best
  9:  20.7s   T: 0.12194 0.96087   V: 0.05370 0.98506 best
 10:  19.9s   T: 0.08849 0.97179   V: 0.16790 0.94938 
 11:  20.0s   T: 0.05961 0.98004   V: 0.09074 0.97614 
 12:  20.2s   T: 0.08535 0.97269   V: 0.04911 0.98729 best
 13:  20.0s   T: 0.07320 0.97659   V: 0.18934 0.93756 
 14:  20.6s   T: 0.07635 0.97520   V: 0.09040 0.97770 
 15:  21.6s   T: 0.08656 0.97447   V: 0.04169 0.98930 best
 16:  21.6s   T: 0.06439 0.97965   V: 0.09317 0.97190 
 17:  21.2s   T: 0.04648 0.98495   V: 0.06627 0.982

# Loading best epoch

In [25]:
trainer.load_state(path2saveModel)
model.mrnd = trainer.model

In [26]:
!free -h
!nvidia-smi

              total        used        free      shared  buff/cache   available
Mem:           220G         92G         58G        8.4G         68G        117G
Swap:            0B          0B          0B
Sun Nov 26 12:59:46 2017       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.90                 Driver Version: 384.90                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00003609:00:00.0 Off |                    0 |
| N/A   64C    P0    78W / 149W |  11106MiB / 11439MiB |     44%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Tesla K80           Off  | 00004BDC:00:00.0 Off |                    0 |
| N/A   74C    P0    71W / 149W |    251MiB 

In [35]:
metrics2csv(trainer, 'ResNet18')

done!


In [28]:
train_eval = trainer.evaluate_loader(dset_loaders_convnet['train'])
valid_eval = trainer.evaluate_loader(dset_loaders_convnet['valid'])

evaluate: 560/560 ok
evaluate: 140/140 ok


In [29]:
train_eval, valid_eval

({'losses': 0.0009273802822247073}, {'losses': 0.01736979211791087})

In [30]:
# need to create a dataloader without shuffle to compare the mismatch
dset_loaders_wshuffle = _create_dataLoader(dsets, batch_size, pin_memory=False, use_shuffle= False)

In [None]:
#we need to use all dloader, because this one has use_shuffle false
result_train = predict(dset_loaders_wshuffle['train'], model, use_gpu=use_gpu)
result_valid = predict(dset_loaders_wshuffle['valid'], model, use_gpu=use_gpu)
result_test = predict(dset_loaders_wshuffle['test'], model, use_gpu=use_gpu)

In [49]:
predictions_out = {'train': result_train, 'valid': result_valid}

In [73]:
save_results(predictions_out, 'Resnet18', use_gpu = use_gpu)

train set result saved!
valid set result saved!


In [None]:
result_train = getPrediction(result_train)
result_valid = getPrediction(result_valid)
# result_test['pred'] must be an array of probabilities to make the submission

In [None]:
correct_train = (result_train['true'] == result_train['pred']).sum()
correct_valid = (result_valid['true'] == result_valid['pred']).sum()

In [76]:
print('Train: ', correct_train, '/', len(dsets['train']) )
print('Valid: ', correct_valid, '/', len(dsets['valid']) )

Train:  17934 / 17940
Valid:  4467 / 4484


# Make submission of the Test set

In [None]:
create_submission(result_test, 'distracted_driver')