# Import Modules and Data

In [1]:
# Load modules
import sys
import os
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPool2D, Dropout, BatchNormalization, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

In [2]:
from notify_run import Notify
notify = Notify()
notify.register()

In [3]:
# Load custom functions
sys.path.append('C:\\Users\\Dylan\\Desktop\\Data Science\\Projects\\DistractedDrivers\\functions')
from ddfuncs import trainsampling, cvrand

In [4]:
# Set memory limit on GPU to keep it from freezing up when fitting TensorFlow models later
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only allocate 1GB * 3 of memory on the first GPU
    try:
        tf.config.experimental.set_virtual_device_configuration(gpus[0], \
                                                                [tf.config.experimental.\
                                                                 VirtualDeviceConfiguration\
                                                                 (memory_limit=1024 * 3)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [5]:
# Load processed training data
os.chdir('../data/processed')
df = pd.read_csv('driver_image_list_processed.csv')

In [6]:
df = trainsampling(df, samples=80, random_state=42)

In [7]:
df

Unnamed: 0,subject,classname,img,imgpath
51,p002,c0,img_13073.jpg,imgs/train/c0/img_13073.jpg
14,p002,c0,img_5585.jpg,imgs/train/c0/img_5585.jpg
71,p002,c0,img_48187.jpg,imgs/train/c0/img_48187.jpg
60,p002,c0,img_98115.jpg,imgs/train/c0/img_98115.jpg
20,p002,c0,img_66355.jpg,imgs/train/c0/img_66355.jpg
...,...,...,...,...
22364,p081,c9,img_95966.jpg,imgs/train/c9/img_95966.jpg
22404,p081,c9,img_18412.jpg,imgs/train/c9/img_18412.jpg
22415,p081,c9,img_23818.jpg,imgs/train/c9/img_23818.jpg
22358,p081,c9,img_54961.jpg,imgs/train/c9/img_54961.jpg


In [8]:
# Change to appropriate directory for data generation
os.chdir('../raw')

# Model 1 - 1 Conv, 1 MaxPool

In [9]:
model1 = Sequential()
model1.add(Conv2D(10, kernel_size=3, activation='relu', input_shape=(256, 256, 3)))
model1.add(MaxPool2D(10))
model1.add(Flatten())
model1.add(Dense(10, activation='softmax'))
opt = Adam(learning_rate=0.0001)
model1.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
model1.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 254, 254, 10)      280       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 25, 25, 10)        0         
_________________________________________________________________
flatten (Flatten)            (None, 6250)              0         
_________________________________________________________________
dense (Dense)                (None, 10)                62510     
Total params: 62,790
Trainable params: 62,790
Non-trainable params: 0
_________________________________________________________________


In [None]:
model1data = cvrand(model1, 
                    df,
                    n_iterations=13,
                    batch_size=16,
                    epochs=50,
                    steps_per_epoch=115,
                    target_size=(256,256),
                    random_state=42,
                    min_delta=0.05,
                    patience=3)

CV iteration 1 of 13
Validation subjects are ['p026' 'p050' 'p002']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 115 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

In [None]:
model1data

In [None]:
model1data.to_csv('../metrics/model1metrics.csv', index=False)

# Model 2 - Add Dropout layer

In [None]:
model2 = Sequential()
model2.add(Conv2D(10, kernel_size=3, activation='relu', input_shape=(256, 256, 3)))
model2.add(Dropout(0.25))
model2.add(MaxPool2D(10))
model2.add(Flatten())
model2.add(Dense(10, activation='softmax'))
opt = Adam(learning_rate=0.0001)
model2.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
model2.summary()

In [None]:
model2data = cvrand(model2, 
                    df,
                    n_iterations=30,
                    batch_size=16,
                    epochs=50,
                    steps_per_epoch=125,
                    target_size=(256, 256),
                    random_state=42,
                    min_delta=0.05,
                    patience=3)

In [None]:
model2data

In [None]:
model2data.to_csv('../metrics/model2metrics.csv', index=False)

# Model 3 - Architecture Modeled off AlexNet

In [None]:
model3 = Sequential()
model3.add(Conv2D(99,
                 kernel_size=11,
                 strides=4,
                 padding='valid',
                 activation='relu',
                 input_shape=(227, 227, 3)))
model3.add(MaxPool2D(3,
                    strides=2,
                    padding='valid'))
model3.add(Conv2D(256,
                 kernel_size=5,
                 strides=1,
                 padding='same',
                 activation='relu'))
model3.add(MaxPool2D(3,
                    strides=2,
                    padding='valid'))
model3.add(Conv2D(384,
                 kernel_size=3,
                 strides=1,
                 padding='same',
                 activation='relu'))
model3.add(Conv2D(384,
                 kernel_size=3,
                 strides=1,
                 padding='same',
                 activation='relu'))
model3.add(Conv2D(256,
                 kernel_size=3,
                 strides=1,
                 padding='same',
                 activation='relu'))
model3.add(Flatten())
model3.add(Dense(100, activation='relu'))
model3.add(Dense(100, activation='relu'))
model3.add(Dense(10, activation='softmax'))
opt = Adam(learning_rate=0.00001)
model3.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
model3.summary()

In [None]:
model3data = cvrand(model3, 
                    df,
                    n_iterations=30,
                    batch_size=16,
                    epochs=50,
                    steps_per_epoch=125,
                    target_size=(227,227),
                    random_state=42,
                    min_delta=0.05,
                    patience=3)

In [None]:
model3data

In [None]:
model3data.to_csv('../metrics/model3metrics.csv', index=False)

In [13]:
notify.send('model 3 cv complete')

# AlexNet Corrected

In [16]:
model4 = Sequential()
model4.add(Conv2D(99,
                  kernel_size=11,
                  strides=4,
                  padding='valid',
                  input_shape=(227, 227, 3)))
model4.add(BatchNormalization())
model4.add(Activation('relu'))
model4.add(MaxPool2D(3,
                     strides=2,
                     padding='valid'))
model4.add(Conv2D(256,
                  kernel_size=5,
                  strides=1,
                  padding='same'))
model4.add(BatchNormalization())
model4.add(Activation('relu'))
model4.add(MaxPool2D(3,
                    strides=2,
                    padding='valid'))
model4.add(Conv2D(384,
                  kernel_size=3,
                  strides=1,
                  padding='same'))
model4.add(BatchNormalization())
model4.add(Activation('relu'))
model4.add(Conv2D(384,
                  kernel_size=3,
                  strides=1,
                  padding='same'))
model4.add(BatchNormalization())
model4.add(Activation('relu'))
model4.add(Conv2D(256,
                  kernel_size=3,
                  strides=1,
                  padding='same'))
model4.add(BatchNormalization())
model4.add(Activation('relu'))
model4.add(MaxPool2D(3,
                     strides=2,
                     padding='valid'))
model4.add(Flatten())
model4.add(Dense(4096, activation='relu'))
model4.add(Dropout(0.5))
model4.add(Dense(4096, activation='relu'))
model4.add(Dropout(0.5))
model4.add(Dense(10, activation='softmax'))
opt = Adam(learning_rate=0.00005)
model4.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
model4.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_10 (Conv2D)           (None, 55, 55, 99)        36036     
_________________________________________________________________
batch_normalization_5 (Batch (None, 55, 55, 99)        396       
_________________________________________________________________
activation_5 (Activation)    (None, 55, 55, 99)        0         
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 27, 27, 99)        0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 27, 27, 256)       633856    
_________________________________________________________________
batch_normalization_6 (Batch (None, 27, 27, 256)       1024      
_________________________________________________________________
activation_6 (Activation)    (None, 27, 27, 256)      

With a batch size of 16 and training set consisting of 20,000 images, performing 50 epochs of 125 steps will mean that the training data is gone over 5 times. Early stopping callback is set to 10, so if the validation accuracy does not improve 10 times in a row then the training will cease.

In [17]:
model4data = cvrand(model4, 
                    df,
                    n_iterations=30,
                    batch_size=16,
                    epochs=50,
                    steps_per_epoch=125,
                    target_size=(227,227),
                    random_state=42,
                    min_delta=0.05,
                    patience=3)

CV iteration 1 of 30
Validation subjects are ['p026' 'p050' 'p002']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
CV iteration 2 of 30
Validation subjects are ['p049' 'p039' 'p035']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
CV iteration 3 of 30
Validation subjects are ['p066' 'p050' 'p075']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames be

Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
CV iteration 6 of 30
Validation subjects are ['p045' 'p035' 'p049']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
CV iteration 7 of 30
Validation subjects are ['p064' 'p021' 'p022']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
CV iteration 8 of 30
Validation subjects are ['p045' 'p035' 'p049']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 cl

Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
CV iteration 11 of 30
Validation subjects are ['p042' 'p075' 'p041']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
CV iteration 12 of 30
Validation subjects are ['p041' 'p016' 'p026']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
CV iteration 13 of 30
Validation subjects are ['p066' 'p050' 'p075']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 clas

Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
CV iteration 16 of 30
Validation subjects are ['p064' 'p021' 'p022']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
CV iteration 17 of 30
Validation subjects are ['p045' 'p012' 'p042']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
CV iteration 18 of 30
Validation subjects are ['p045' 'p012' 'p042']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 valida

Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
CV iteration 21 of 30
Validation subjects are ['p066' 'p050' 'p075']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
CV iteration 22 of 30
Validation subjects are ['p045' 'p035' 'p049']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
CV iteration 23 of 30
Validation subjects are ['p064' 'p021' 'p022']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 valida

Epoch 9/50
Epoch 10/50
CV iteration 25 of 30
Validation subjects are ['p045' 'p012' 'p042']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
CV iteration 26 of 30
Validation subjects are ['p045' 'p012' 'p042']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
CV iteration 27 of 30
Validation subjects are ['p045' 'p012' 'p042']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to

Epoch 5/50
Epoch 6/50
Epoch 7/50
CV iteration 30 of 30
Validation subjects are ['p064' 'p021' 'p022']
Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50


In [20]:
model4data

Unnamed: 0,iteration,sampledvalues,validation_accuracy
0,1,"[p026, p050, p002]",0.462
1,2,"[p049, p039, p035]",0.587
2,3,"[p066, p050, p075]",0.371
3,4,"[p045, p012, p042]",0.389
4,5,"[p041, p016, p026]",0.581
5,6,"[p045, p035, p049]",0.496
6,7,"[p064, p021, p022]",0.542
7,8,"[p045, p035, p049]",0.485
8,9,"[p041, p016, p026]",0.472
9,10,"[p045, p056, p022]",0.657


In [None]:
model4data.to_csv('../metrics/model4metrics.csv', index=False)

In [None]:
notify.send('all fitting complete')

In [10]:
df1 = trainsampling(df)

In [12]:
df1.subject.unique()

array(['p002', 'p012', 'p014', 'p015', 'p016', 'p021', 'p022', 'p024',
       'p026', 'p035', 'p039', 'p041', 'p042', 'p045', 'p047', 'p049',
       'p050', 'p051', 'p052', 'p056', 'p061', 'p064', 'p066', 'p072',
       'p075', 'p081'], dtype=object)

In [32]:
dval = df1[df1['subject'].isin(['p002', 'p042', 'p081'])]

In [33]:
dtrain = df1[~df1['subject'].isin(['p002', 'p042', 'p081'])]

In [38]:
datagen = keras.preprocessing.image.ImageDataGenerator()

# Designate model checkpoint and callbacks_list
checkpoint = ModelCheckpoint('weights.hdf5',
                                mode='max',
                            monitor='val_accuracy',
                                 save_best_only=True)

earlystop = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=5)

callbacks_list = [checkpoint, earlystop]
train = datagen.flow_from_dataframe(dtrain, x_col='imgpath',
                                            y_col='classname',
                                            batch_size=16,
                                            target_size=(227, 227),
                                            seed=42)
val = datagen.flow_from_dataframe(dval, x_col='imgpath',
                                            y_col='classname',
                                            target_size=(227, 227),
                                            seed=42)

model4.fit(train, epochs=50, steps_per_epoch=125, callbacks=callbacks_list, validation_data=val)


Found 18400 validated image filenames belonging to 10 classes.
Found 2400 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']


  ...
    to  
  ['...']


  ...
    to  
  ['...']


  ...
    to  
  ['...']


Train for 125 steps, validate for 75 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50


<tensorflow.python.keras.callbacks.History at 0x2e9ea06c088>

In [39]:
test = datagen.flow_from_directory('imgs/testlabeled',
                                   seed=42,
                                   target_size=(227,227))

Found 200 images belonging to 10 classes.


In [41]:
model4.load_weights('weights.hdf5')
model4.evaluate(test)

  ...
    to  
  ['...']


  ...
    to  
  ['...']




[1.8353876556668962, 0.54]

###### It seems that I ened to revamp my CV functions. It should randomly pick 3 - 5 subjects to isolate for the validation test, rather than cycling through each one individually. This way we can set the number of cross-validations to perform and we get a better sense on how well it's generalizing to all people.

# Let's Try ResNet50

In [8]:
from tensorflow.keras.applications import DenseNet121

In [10]:
model5 = DenseNet121(include_top=True, weights=None, classes=10)
opt = Adam(learning_rate=0.0001)
model5.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

NameError: name 'DenseNet121' is not defined

In [10]:
model5.summary()

Model: "densenet121"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
zero_padding2d (ZeroPadding2D)  (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 112, 112, 64) 9408        zero_padding2d[0][0]             
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1/conv[0][0]                 
________________________________________________________________________________________

In [12]:
model5data = samplecv(model5, 
                      df,
                      samples=80, 
                      batch_size=16,
                      epochs=50, 
                      steps_per_epoch=50, 
                      validation_steps=None, 
                      target_size=(299,299),
                      patience=25,
                      random_state=42)

Resample iteration 1
CV iteration 1
Substep 1 of 26
Found 20000 validated image filenames belonging to 10 classes.
Found 800 validated image filenames belonging to 10 classes.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 50 steps, validate for 25 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50


KeyboardInterrupt: 