# Import Modules and Data

In [1]:
# Load modules
import sys
import os
os.chdir('..')

import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import backend as K
from tensorflow.keras import Sequential, layers
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPool2D, Dropout, BatchNormalization, Activation
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
sns.set()

from functions import cvmodeleval,samplecv, trainsampling

In [2]:
# Set memory limit on GPU to keep it from freezing up when fitting TensorFlow models later
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only allocate 1GB * 3 of memory on the first GPU
    try:
        tf.config.experimental.set_virtual_device_configuration(gpus[0], \
                                                                [tf.config.experimental.\
                                                                 VirtualDeviceConfiguration\
                                                                 (memory_limit=1024 * 4)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [3]:
# Load training filename dataframe
df = pd.read_csv('data/processed/driver_image_list_processed.csv')

In [4]:
# Labeled and unlabeled test filename dataframes
df_test_labeled = pd.read_csv('D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/processed/labeled_test_df.csv')
df_test = pd.read_csv('D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/raw/test_filenames.csv')

In [5]:
# Call custom function to over/undersample classes occurance by subject so dataset is completely balanced.
df = trainsampling(df, samples=80, random_state=42)

In [6]:
# Define path variables for data
train_path = 'D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/raw/imgs/train'
labeled_test_path = 'D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/raw/imgs/testlabeled'
unlabeled_test_path = 'D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/raw/imgs/test'

4 subjects were chosen from the training data to be used for validation during model training. These subjects represent one woman and man with dark skin and one woman and man with light skin. This is to help balance any potential racial bias in the model.

In [7]:
# Define list of validation subjects
val_subjects = ['p056', 'p050', 'p041', 'p016']

In [8]:
# Define training and validation datasets
df_train = df[~df['subject'].isin(val_subjects)]
df_val = df[df['subject'].isin(val_subjects)]

In [9]:
# Shuffle the dataframes
df_train = df_train.sample(frac=1, random_state=42)
df_val = df_val.sample(frac=1, random_state=42)

## Pre-Processing and Data-Loading

In [10]:
# Define ImageDataGenerators for training and test/validation data. Generators include randomized preprocessing for 
# called out parameters. Test_dgen will be used for both validation data and test data.

train_dgen = ImageDataGenerator(samplewise_center=True,
                                rescale=1./255,
                                rotation_range=40,
                                width_shift_range=0.2,
                                height_shift_range=0.2,
                                channel_shift_range=0.2,
                                shear_range=0.2,
                                zoom_range=0.2,
                                brightness_range=[0.5, 1.5])

test_dgen = ImageDataGenerator(samplewise_center=True,
                               rescale=1./255)

In [11]:
# Load training, validation, and test data
train = train_dgen.flow_from_dataframe(df_train,
                                       x_col='imgpath',
                                       y_col='classname',
                                       batch_size=16, 
                                       target_size=(227,227),
                                       shuffle=True)

val = test_dgen.flow_from_dataframe(df_val,
                                    x_col='imgpath',
                                    y_col='classname',
                                    target_size=(227,227),
                                    shuffle=False)

test_labeled = test_dgen.flow_from_dataframe(df_test_labeled,
                                             x_col='filename',
                                             y_col='classname',
                                             target_size=(227,227),
                                             shuffle=False)

test = test_dgen.flow_from_dataframe(df_test,
                                     x_col='filename',
                                     y_col='class',
                                     target_size=(227,227),
                                     shuffle=False)


Found 17600 validated image filenames belonging to 10 classes.
Found 3200 validated image filenames belonging to 10 classes.
Found 200 validated image filenames belonging to 10 classes.
Found 79726 validated image filenames belonging to 1 classes.


# Model1 - AlexNet Architecture

Architecture below is based off of AlexNet

In [12]:
# Define function for initializing model
def alexNet_arch(opt):
    model = Sequential()
    model.add(Conv2D(99,
                  kernel_size=11,
                  strides=4,
                  padding='valid',
                  input_shape=(227, 227, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPool2D(3,
                         strides=2,
                         padding='valid'))
    model.add(Conv2D(256,
                      kernel_size=5,
                      strides=1,
                      padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPool2D(3,
                        strides=2,
                        padding='valid'))
    model.add(Conv2D(384,
                      kernel_size=3,
                      strides=1,
                      padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(384,
                      kernel_size=3,
                      strides=1,
                      padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(256,
                      kernel_size=3,
                      strides=1,
                      padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPool2D(3,
                         strides=2,
                         padding='valid'))
    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [13]:
# Define optimizer variable
opt = RMSprop(learning_rate=0.0001)

In [14]:
# Compile model_1
model_1 = alexNet_arch(opt)

In [15]:
# Designate ModelCheckpoint and EarlyStopping callbacks_list
checkpoint = ModelCheckpoint('D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/weights/model1_weights.hdf5',
                              mode='min',
                              monitor='val_loss',
                              save_best_only=True)

earlystop = EarlyStopping(monitor='val_loss', min_delta=0.025, patience=20, restore_best_weights=True)

callbacks_list = [checkpoint, earlystop]

In [16]:
%%time
# Train model_1
model_1.fit(train,
            epochs=100,
            steps_per_epoch=110,
            validation_data=val,
            callbacks=callbacks_list)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100


Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Wall time: 44min 20s


<tensorflow.python.keras.callbacks.History at 0x25ed51c4400>

In [17]:
# Save model predictions of validation data
validation_predictions = model_1.predict(val)

In [18]:
# Convert to a dataframe with original labeles
validation_predictions = pd.DataFrame(validation_predictions, 
                                         columns=['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])
validation_predictions = df_val.reset_index(drop=True).join(validation_predictions)

In [19]:
# Save validation predictions
validation_predictions.to_csv('D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/validation_predictions/model_1_validation_predictions.csv')

In [20]:
# Evaluate validation data with best coefficients
model_1_val_metrics = model_1.evaluate(val)



In [21]:
model_1_val_metrics = pd.DataFrame({'Cross Entropy Loss':[model_1_val_metrics[0]], 
                                    'Accuracy':[model_1_val_metrics[1]]})
model_1_val_metrics.to_csv('D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/metrics/model_1_val_metrics.csv', index=False)

In [22]:
model_1_val_metrics

Unnamed: 0,Cross Entropy Loss,Accuracy
0,0.971922,0.692813


0.9719 cross-entropy loss and 0.6928 accuracy are not a bad start

### Model 2 - Xception Transfer Learning

Using a method desribed at this url (https://www.analyticsvidhya.com/blog/2020/08/top-4-pre-trained-models-for-image-classification-with-python-code/), I loaded a pre-trained Xception model through keras with weights optimized for imagenet. I made all of the existing layers non-trainable and then added a few trainable layers which will be fit to the data.

In [12]:
base_model_2 = tf.keras.applications.Xception(include_top=False,
                                        weights='imagenet',
                                        input_shape=(227,227,3))
for layer in base_model_2.layers:
    layer.trainable = False

# Flatten the output layer to 1 dimension
x = layers.Flatten()(base_model_2.output)

# Add a fully connected layer with 512 hidden units and ReLU activation
x = layers.Dense(512, activation='relu')(x)

# Add a dropout rate of 0.5
x = layers.Dropout(0.5)(x)

# Add a final sigmoid layer for classification
x = layers.Dense(10, activation='softmax')(x)

model_2 = tf.keras.models.Model(base_model_2.input, x)
model_2.compile(optimizer = RMSprop(0.0001), loss = 'categorical_crossentropy',metrics = ['accuracy'])
model_2.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 227, 227, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 113, 113, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 113, 113, 32) 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 113, 113, 32) 0           block1_conv1_bn[0][0]            
______________________________________________________________________________________________

In [13]:
# Designate ModelCheckpoint and EarlyStopping callbacks_list
checkpoint = ModelCheckpoint('D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/weights/model2_weights.hdf5',
                              mode='min',
                              monitor='val_loss',
                              save_best_only=True)

earlystop = EarlyStopping(monitor='val_loss', min_delta=0.025, patience=20, restore_best_weights=True)

callbacks_list = [checkpoint, earlystop]

In [14]:
model_2.fit(train,
            epochs=100,
            steps_per_epoch=110,
            validation_data=val,
            callbacks=callbacks_list)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 110 steps, validate for 100 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100


<tensorflow.python.keras.callbacks.History at 0x18ba027de48>

In [15]:
# Save model predictions of validation data
validation_predictions = model_2.predict(val)

# Convert to a dataframe with original labels
validation_predictions = pd.DataFrame(validation_predictions, 
                                         columns=['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])
validation_predictions = df_val.reset_index(drop=True).join(validation_predictions)

In [16]:
# Save validation predictions
validation_predictions.to_csv('D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/validation_predictions/model_2_validation_predictions.csv')

In [17]:
# Evaluate validation data with best coefficients
model_2_val_metrics = model_2.evaluate(val)

  ...
    to  
  ['...']


In [21]:
model_2_val_metrics = pd.DataFrame({'Cross Entropy Loss':[model_2_val_metrics[0]], 
                                    'Accuracy':[model_2_val_metrics[1]]})
model_2_val_metrics.to_csv('D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/metrics/model_2_val_metrics.csv', index=False)

In [22]:
model_2_val_metrics

Unnamed: 0,Cross Entropy Loss,Accuracy
0,2.15828,0.216875


### Model 3 - VGG16 Transfer-Learning

In [12]:
base_model_3 = tf.keras.applications.VGG16(include_top=False,
                                        weights='imagenet',
                                        input_shape=(227,227,3))
for layer in base_model_3.layers:
    layer.trainable = False

# Flatten the output layer to 1 dimension
x = layers.Flatten()(base_model_3.output)

# Add a fully connected layer with 512 hidden units and ReLU activation
x = layers.Dense(512, activation='relu')(x)

# Add a dropout rate of 0.5
x = layers.Dropout(0.5)(x)

# Add a final sigmoid layer for classification
x = layers.Dense(10, activation='softmax')(x)

model_3 = tf.keras.models.Model(base_model_3.input, x)
model_3.compile(optimizer = RMSprop(0.0001), loss = 'categorical_crossentropy',metrics = ['accuracy'])
model_3.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 227, 227, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 227, 227, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 227, 227, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 113, 113, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 113, 113, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 113, 113, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [21]:
# Designate ModelCheckpoint and EarlyStopping callbacks_list
checkpoint = ModelCheckpoint('D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/weights/model3_weights.hdf5',
                              mode='min',
                              monitor='val_loss',
                              save_best_only=True)

earlystop = EarlyStopping(monitor='val_loss', min_delta=0.025, patience=50, restore_best_weights=True)

callbacks_list = [checkpoint, earlystop]

In [14]:
model_3.fit(train,
            epochs=100,
            steps_per_epoch=110,
            validation_data=val,
            callbacks=callbacks_list)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 110 steps, validate for 100 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100


<tensorflow.python.keras.callbacks.History at 0x204e7f42d08>

I renamed the weight file for this first run to model3_weights_first_run.hdf5

In [22]:
# Fit the model further and see if it improves
model_3.fit(train,
            epochs=200,
            steps_per_epoch=110,
            validation_data=val,
            callbacks=callbacks_list)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 110 steps, validate for 100 steps
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch

<tensorflow.python.keras.callbacks.History at 0x205a9fdb4c8>

I renamed the weight file for this second run to model3_weights_second_run.hdf5

Change the learning rate from 0.0001 to 0.00001, reload the weights from the first run, and then see if it improves.

In [39]:
# Change learning rate
K.set_value(model_3.optimizer.learning_rate, 0.00001)

In [40]:
# Load weights from first run
model_3.load_weights('D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/weights/model3_weights_first_run.hdf5')

In [41]:
# Update callbacks_list. Remove earlystop. Have run for 600 epochs overnight.
checkpoint = ModelCheckpoint('D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/weights/model3_weights_third_run.hdf5',
                              mode='min',
                              monitor='val_loss',
                              save_best_only=True)

callbacks_list = [checkpoint]

In [42]:
# Try improving with learning rate=0.00001
model_3.fit(train,
            epochs=600,
            steps_per_epoch=110,
            validation_data=val,
            callbacks=callbacks_list)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 110 steps, validate for 100 steps
Epoch 1/600
Epoch 2/600
Epoch 3/600
Epoch 4/600
Epoch 5/600
Epoch 6/600
Epoch 7/600
Epoch 8/600
Epoch 9/600
Epoch 10/600
Epoch 11/600
Epoch 12/600
Epoch 13/600
Epoch 14/600
Epoch 15/600
Epoch 16/600
Epoch 17/600
Epoch 18/600
Epoch 19/600
Epoch 20/600
Epoch 21/600
Epoch 22/600
Epoch 23/600
Epoch 24/600
Epoch 25/600
Epoch 26/600
Epoch 27/600
Epoch 28/600
Epoch 29/600
Epoch 30/600
Epoch 31/600
Epoch 32/600
Epoch 33/600
Epoch 34/600
Epoch 35/600
Epoch 36/600
Epoch 37/600
Epoch 38/600
Epoch 39/600
Epoch 40/600
Epoch 41/600
Epoch 42/600
Epoch 43/600
Epoch 44/600
Epoch 45/600
Epoch 46/600
Epoch 47/600
Epoch 48/600
Epoch 49/600
Epoch 50/600
Epoch 51/600
Epoch 52/600
Epoch 53/600
Epoch 54/600
Epoch 55/600
Epoch 56/600
Epoch 57/600
Epoch 58/600
Epoch 59/600
Epoch 60/600
Epoch 61/600
Epoch 62/600
Epoch 63/600
Epoch 64/600
Epoch 65/600
Epoch 66/600
Epoch 67/600
Epoch 68/600
Epoch 69/600
Epoch 70/600
Epoch

Epoch 111/600
Epoch 112/600
Epoch 113/600
Epoch 114/600
Epoch 115/600
Epoch 116/600
Epoch 117/600
Epoch 118/600
Epoch 119/600
Epoch 120/600
Epoch 121/600
Epoch 122/600
Epoch 123/600
Epoch 124/600
Epoch 125/600
Epoch 126/600
Epoch 127/600
Epoch 128/600
Epoch 129/600
Epoch 130/600
Epoch 131/600
Epoch 132/600
Epoch 133/600
Epoch 134/600
Epoch 135/600
Epoch 136/600
Epoch 137/600
Epoch 138/600
Epoch 139/600
Epoch 140/600
Epoch 141/600
Epoch 142/600
Epoch 143/600
Epoch 144/600
Epoch 145/600
Epoch 146/600
Epoch 147/600
Epoch 148/600
Epoch 149/600
Epoch 150/600
Epoch 151/600
Epoch 152/600
Epoch 153/600
Epoch 154/600
Epoch 155/600
Epoch 156/600
Epoch 157/600
Epoch 158/600
Epoch 159/600
Epoch 160/600
Epoch 161/600
Epoch 162/600
Epoch 163/600
Epoch 164/600
Epoch 165/600
Epoch 166/600
Epoch 167/600
Epoch 168/600
Epoch 169/600
Epoch 170/600
Epoch 171/600
Epoch 172/600
Epoch 173/600
Epoch 174/600
Epoch 175/600
Epoch 176/600
Epoch 177/600
Epoch 178/600
Epoch 179/600
Epoch 180/600
Epoch 181/600
Epoch 

Epoch 222/600
Epoch 223/600
Epoch 224/600
Epoch 225/600
Epoch 226/600
Epoch 227/600
Epoch 228/600
Epoch 229/600
Epoch 230/600
Epoch 231/600
Epoch 232/600
Epoch 233/600
Epoch 234/600
Epoch 235/600
Epoch 236/600
Epoch 237/600
Epoch 238/600
Epoch 239/600
Epoch 240/600
Epoch 241/600
Epoch 242/600
Epoch 243/600
Epoch 244/600
Epoch 245/600
Epoch 246/600
Epoch 247/600
Epoch 248/600
Epoch 249/600
Epoch 250/600
Epoch 251/600
Epoch 252/600
Epoch 253/600
Epoch 254/600
Epoch 255/600
Epoch 256/600
Epoch 257/600
Epoch 258/600
Epoch 259/600
Epoch 260/600
Epoch 261/600
Epoch 262/600
Epoch 263/600
Epoch 264/600
Epoch 265/600
Epoch 266/600
Epoch 267/600
Epoch 268/600
Epoch 269/600
Epoch 270/600
Epoch 271/600
Epoch 272/600
Epoch 273/600
Epoch 274/600
Epoch 275/600
Epoch 276/600
Epoch 277/600
Epoch 278/600
Epoch 279/600
Epoch 280/600
Epoch 281/600
Epoch 282/600
Epoch 283/600
Epoch 284/600
Epoch 285/600
Epoch 286/600
Epoch 287/600
Epoch 288/600
Epoch 289/600
Epoch 290/600
Epoch 291/600
Epoch 292/600
Epoch 

Epoch 332/600
Epoch 333/600
Epoch 334/600
Epoch 335/600
Epoch 336/600
Epoch 337/600
Epoch 338/600
Epoch 339/600
Epoch 340/600
Epoch 341/600
Epoch 342/600
Epoch 343/600
Epoch 344/600
Epoch 345/600
Epoch 346/600
Epoch 347/600
Epoch 348/600
Epoch 349/600
Epoch 350/600
Epoch 351/600
Epoch 352/600
Epoch 353/600
Epoch 354/600
Epoch 355/600
Epoch 356/600
Epoch 357/600
Epoch 358/600
Epoch 359/600
Epoch 360/600
Epoch 361/600
Epoch 362/600
Epoch 363/600
Epoch 364/600
Epoch 365/600
Epoch 366/600
Epoch 367/600
Epoch 368/600
Epoch 369/600
Epoch 370/600
Epoch 371/600
Epoch 372/600
Epoch 373/600
Epoch 374/600
Epoch 375/600
Epoch 376/600
Epoch 377/600
Epoch 378/600
Epoch 379/600
Epoch 380/600
Epoch 381/600
Epoch 382/600
Epoch 383/600
Epoch 384/600
Epoch 385/600
Epoch 386/600
Epoch 387/600
Epoch 388/600
Epoch 389/600
Epoch 390/600
Epoch 391/600
Epoch 392/600
Epoch 393/600
Epoch 394/600
Epoch 395/600
Epoch 396/600
Epoch 397/600
Epoch 398/600
Epoch 399/600
Epoch 400/600
Epoch 401/600
Epoch 402/600
Epoch 

Epoch 442/600
Epoch 443/600
Epoch 444/600
Epoch 445/600
Epoch 446/600
Epoch 447/600
Epoch 448/600
Epoch 449/600
Epoch 450/600
Epoch 451/600
Epoch 452/600
Epoch 453/600
Epoch 454/600
Epoch 455/600
Epoch 456/600
Epoch 457/600
Epoch 458/600
Epoch 459/600
Epoch 460/600
Epoch 461/600
Epoch 462/600
Epoch 463/600
Epoch 464/600
Epoch 465/600
Epoch 466/600
Epoch 467/600
Epoch 468/600
Epoch 469/600
Epoch 470/600
Epoch 471/600
Epoch 472/600
Epoch 473/600
Epoch 474/600
Epoch 475/600
Epoch 476/600
Epoch 477/600
Epoch 478/600
Epoch 479/600
Epoch 480/600
Epoch 481/600
Epoch 482/600
Epoch 483/600
Epoch 484/600
Epoch 485/600
Epoch 486/600
Epoch 487/600
Epoch 488/600
Epoch 489/600
Epoch 490/600
Epoch 491/600
Epoch 492/600
Epoch 493/600
Epoch 494/600
Epoch 495/600
Epoch 496/600
Epoch 497/600
Epoch 498/600
Epoch 499/600
Epoch 500/600
Epoch 501/600
Epoch 502/600
Epoch 503/600
Epoch 504/600
Epoch 505/600
Epoch 506/600
Epoch 507/600
Epoch 508/600
Epoch 509/600
Epoch 510/600
Epoch 511/600
Epoch 512/600
Epoch 

Epoch 552/600
Epoch 553/600
Epoch 554/600
Epoch 555/600
Epoch 556/600
Epoch 557/600
Epoch 558/600
Epoch 559/600
Epoch 560/600
Epoch 561/600
Epoch 562/600
Epoch 563/600
Epoch 564/600
Epoch 565/600
Epoch 566/600
Epoch 567/600
Epoch 568/600
Epoch 569/600
Epoch 570/600
Epoch 571/600
Epoch 572/600
Epoch 573/600
Epoch 574/600
Epoch 575/600
Epoch 576/600
Epoch 577/600
Epoch 578/600
Epoch 579/600
Epoch 580/600
Epoch 581/600
Epoch 582/600
Epoch 583/600
Epoch 584/600
Epoch 585/600
Epoch 586/600
Epoch 587/600
Epoch 588/600
Epoch 589/600
Epoch 590/600
Epoch 591/600
Epoch 592/600
Epoch 593/600
Epoch 594/600
Epoch 595/600
Epoch 596/600
Epoch 597/600
Epoch 598/600
Epoch 599/600
Epoch 600/600


<tensorflow.python.keras.callbacks.History at 0x205cb519348>

In [15]:
# Save model predictions of validation data
validation_predictions = model_3.predict(val)

# Convert to a dataframe with original labels
validation_predictions = pd.DataFrame(validation_predictions, 
                                         columns=['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])
validation_predictions = df_val.reset_index(drop=True).join(validation_predictions)

In [16]:
# Save validation predictions
validation_predictions.to_csv('D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/validation_predictions/model_3_validation_predictions.csv')

In [17]:
# Evaluate validation data with best coefficients
model_3_val_metrics = model_3.evaluate(val)

  ...
    to  
  ['...']


In [18]:
model_3_val_metrics = pd.DataFrame({'Cross Entropy Loss':[model_3_val_metrics[0]], 
                                    'Accuracy':[model_3_val_metrics[1]]})
model_3_val_metrics.to_csv('D:/Users/Dylan/Documents/Data Science/Projects/DistractedDrivers/data/metrics/model_3_val_metrics.csv', index=False)

In [19]:
model_3_val_metrics

Unnamed: 0,Cross Entropy Loss,Accuracy
0,0.731836,0.756875


Getting better!

In [62]:
base_model3 = tf.keras.applications.VGG16(include_top=False,
                                        weights='imagenet',
                                        input_shape=(227,227,3))

In [63]:
for layer in model3.layers:
    layer.trainable = False

In [64]:
from tensorflow.keras import layers

In [65]:
# Flatten the output layer to 1 dimension
x = layers.Flatten()(model3.output)

# Add a fully connected layer with 512 hidden units and ReLU activation
x = layers.Dense(512, activation='relu')(x)

# Add a dropout rate of 0.5
x = layers.Dropout(0.5)(x)

# Add a final sigmoid layer for classification
x = layers.Dense(10, activation='softmax')(x)

model3 = tf.keras.models.Model(model3.input, x)

model3.compile(optimizer = tf.keras.optimizers.RMSprop(lr=0.0001), loss = 'categorical_crossentropy',metrics = ['acc'])

In [66]:
model3.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 227, 227, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 227, 227, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 227, 227, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 113, 113, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 113, 113, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 113, 113, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0   

In [67]:
os.getcwd()

'C:\\Users\\Dylan\\Desktop\\Data Science\\Projects\\DistractedDrivers\\data\\raw\\imgs\\testlabeled\\unsorted'

In [70]:
model3.load_weights('D:\\Users\\Dylan\\Documents\\Data Science\\Projects\\DistractedDrivers\\data\\raw\\weights.hdf5')

In [33]:
model3.fit(train,
                epochs=50,
                steps_per_epoch=110,
                validation_data=test,
                callbacks=callbacks_list)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 110 steps, validate for 100 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50


<tensorflow.python.keras.callbacks.History at 0x19ee1d5e888>

In [34]:
validation_predictions = model3.predict(test)

In [35]:
validation_predictions = pd.DataFrame(validation_predictions, 
                                         columns=['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])

In [36]:
classifier_train_data = df_test.reset_index(drop=True).join(validation_predictions)

In [37]:
classifier_train_data

Unnamed: 0,subject,classname,img,imgpath,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,p050,c9,img_37057.jpg,imgs/train/c9/img_37057.jpg,0.038092,0.047378,0.055674,0.003527,0.011132,0.032342,0.036965,0.002194,0.729606,0.043091
1,p056,c1,img_70214.jpg,imgs/train/c1/img_70214.jpg,0.001814,0.665885,0.046201,0.011587,0.001652,0.002267,0.147198,0.049998,0.066739,0.006660
2,p050,c7,img_89317.jpg,imgs/train/c7/img_89317.jpg,0.004418,0.058346,0.321970,0.004580,0.001134,0.001784,0.017168,0.522277,0.043075,0.025248
3,p041,c1,img_9714.jpg,imgs/train/c1/img_9714.jpg,0.021931,0.579883,0.051502,0.050977,0.004998,0.064224,0.037028,0.003008,0.048320,0.138128
4,p016,c2,img_46158.jpg,imgs/train/c2/img_46158.jpg,0.003152,0.329363,0.136421,0.014852,0.012734,0.014436,0.121248,0.182954,0.082555,0.102285
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,p041,c3,img_78650.jpg,imgs/train/c3/img_78650.jpg,0.009461,0.001977,0.004556,0.822273,0.130280,0.014533,0.001842,0.000328,0.008226,0.006525
3196,p041,c4,img_83455.jpg,imgs/train/c4/img_83455.jpg,0.007229,0.000311,0.011610,0.164005,0.793564,0.010024,0.003079,0.000376,0.006493,0.003310
3197,p041,c6,img_58453.jpg,imgs/train/c6/img_58453.jpg,0.005851,0.024534,0.390130,0.028171,0.084743,0.035849,0.285895,0.004593,0.124124,0.016109
3198,p041,c0,img_51874.jpg,imgs/train/c0/img_51874.jpg,0.409633,0.021191,0.047486,0.014772,0.004590,0.097627,0.006294,0.039794,0.050137,0.308477


In [38]:
classifier_train_data.to_csv('../processed/VGG16_classifier_training_data.csv')

In [39]:
from sklearn.model_selection import train_test_split

In [40]:
X = classifier_train_data.drop(columns=['subject', 'classname', 'img', 'imgpath'])
y= classifier_train_data[['classname']]

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [42]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier

In [43]:
params = {'criterion':['gini', 'entropy'],
          'min_samples_split':np.arange(2, 20, 1),
          'min_samples_leaf':np.arange(1, 20, 1)}

In [71]:
clf = RandomForestClassifier(n_jobs=-2)

In [74]:
RandForest = RandomizedSearchCV(clf, params, scoring='accuracy', 
                           n_iter=30)

In [75]:
RandForest.fit(X_train, np.ravel(y_train))

RandomizedSearchCV(estimator=RandomForestClassifier(n_jobs=-2), n_iter=30,
                   param_distributions={'criterion': ['gini', 'entropy'],
                                        'min_samples_leaf': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19]),
                                        'min_samples_split': array([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
       19])},
                   scoring='accuracy')

In [76]:
print(RandForest.best_params_)

{'min_samples_split': 5, 'min_samples_leaf': 2, 'criterion': 'entropy'}


In [77]:
from sklearn.metrics import classification_report

In [78]:
y_pred = RandForest.predict(X_test)

In [79]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

          c0       0.88      0.97      0.93        77
          c1       0.96      0.96      0.96        82
          c2       0.92      0.92      0.92        78
          c3       0.99      0.95      0.97        87
          c4       0.98      0.99      0.98        89
          c5       1.00      1.00      1.00        76
          c6       0.89      0.95      0.92        78
          c7       0.95      0.99      0.97        70
          c8       0.99      0.85      0.91        84
          c9       0.97      0.95      0.96        79

    accuracy                           0.95       800
   macro avg       0.95      0.95      0.95       800
weighted avg       0.95      0.95      0.95       800



Both models are created now. Let's evaluate on the small labeled test set.

In [55]:
os.getcwd()

'C:\\Users\\Dylan\\Desktop\\Data Science\\Projects\\DistractedDrivers\\data\\raw'

In [56]:
test_labeled_df = pd.read_csv('../processed/labeled_test_df.csv')

In [80]:
test_labeled_df

Unnamed: 0,filename,classname
0,img_101570.jpg,c0
1,img_102035.jpg,c0
2,img_22.jpg,c0
3,img_23.jpg,c0
4,img_24.jpg,c0
...,...,...
195,img_54733.jpg,c9
196,img_54928.jpg,c9
197,img_54990.jpg,c9
198,img_55171.jpg,c9


In [81]:
os.getcwd()

'C:\\Users\\Dylan\\Desktop\\Data Science\\Projects\\DistractedDrivers\\data\\raw\\imgs\\testlabeled\\unsorted'

In [82]:
os.chdir('imgs/testlabeled/unsorted')

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'imgs/testlabeled/unsorted'

In [83]:
test = test_dgen.flow_from_dataframe(test_labeled_df,
                                     x_col='filename',
                                     y_col='classname',
                                     target_size=(227,227),
                                     batch_size=16,
                                     shuffle=False)

Found 200 validated image filenames belonging to 10 classes.


In [84]:
test_nn_output = model3.predict(test)

In [85]:
model3.evaluate(test)

  ...
    to  
  ['...']


[0.8905925131761111, 0.73]

In [86]:
y_labeled_test=test_labeled_df[['classname']]

In [87]:
X_labeled_test = pd.DataFrame(test_nn_output, 
                              columns=['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])

In [88]:
X_labeled_test

Unnamed: 0,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,0.047275,0.005121,0.027903,0.069816,0.096730,0.443145,0.157516,0.006203,0.106239,0.040052
1,0.757133,0.000159,0.000689,0.019632,0.008444,0.082580,0.000070,0.000231,0.001939,0.129122
2,0.393718,0.007405,0.000747,0.249553,0.039259,0.031297,0.001105,0.001983,0.012218,0.262715
3,0.469957,0.009404,0.002149,0.127384,0.066894,0.024054,0.001107,0.005694,0.015252,0.278104
4,0.102190,0.102216,0.010231,0.464914,0.107785,0.008216,0.053829,0.014488,0.033943,0.102189
...,...,...,...,...,...,...,...,...,...,...
195,0.004901,0.107628,0.026312,0.000329,0.000066,0.002584,0.002301,0.498552,0.013659,0.343667
196,0.001136,0.006047,0.019866,0.002046,0.004147,0.001085,0.005265,0.003656,0.944786,0.011967
197,0.486174,0.046821,0.000994,0.012781,0.000907,0.115279,0.001817,0.001643,0.005545,0.328040
198,0.106881,0.007360,0.058115,0.084603,0.080829,0.007823,0.006192,0.143699,0.163061,0.341437


In [89]:
y_labeled_test

Unnamed: 0,classname
0,c0
1,c0
2,c0
3,c0
4,c0
...,...
195,c9
196,c9
197,c9
198,c9


In [94]:
labeled_predictions = RandForest.predict(X_labeled_test)

In [95]:
labeled_predictions

array(['c6', 'c0', 'c0', 'c0', 'c3', 'c0', 'c0', 'c0', 'c0', 'c0', 'c5',
       'c0', 'c0', 'c0', 'c0', 'c4', 'c0', 'c0', 'c0', 'c0', 'c1', 'c1',
       'c1', 'c1', 'c1', 'c8', 'c2', 'c1', 'c1', 'c6', 'c1', 'c8', 'c1',
       'c1', 'c6', 'c1', 'c1', 'c2', 'c1', 'c1', 'c6', 'c6', 'c8', 'c2',
       'c1', 'c7', 'c1', 'c1', 'c2', 'c6', 'c7', 'c2', 'c2', 'c8', 'c1',
       'c7', 'c8', 'c2', 'c2', 'c2', 'c3', 'c5', 'c3', 'c4', 'c3', 'c3',
       'c3', 'c3', 'c4', 'c4', 'c3', 'c3', 'c3', 'c3', 'c3', 'c3', 'c3',
       'c3', 'c0', 'c3', 'c8', 'c3', 'c4', 'c4', 'c8', 'c4', 'c4', 'c4',
       'c4', 'c4', 'c3', 'c3', 'c4', 'c4', 'c4', 'c4', 'c4', 'c3', 'c4',
       'c4', 'c5', 'c5', 'c5', 'c5', 'c5', 'c5', 'c5', 'c5', 'c5', 'c5',
       'c5', 'c5', 'c5', 'c5', 'c8', 'c5', 'c5', 'c5', 'c5', 'c5', 'c6',
       'c6', 'c6', 'c6', 'c6', 'c1', 'c6', 'c6', 'c6', 'c6', 'c6', 'c1',
       'c8', 'c8', 'c6', 'c8', 'c6', 'c6', 'c6', 'c6', 'c7', 'c7', 'c7',
       'c8', 'c8', 'c8', 'c7', 'c8', 'c1', 'c7', 'c

In [96]:
print(classification_report(y_labeled_test, labeled_predictions))

              precision    recall  f1-score   support

          c0       0.70      0.80      0.74        20
          c1       0.61      0.70      0.65        20
          c2       0.78      0.35      0.48        20
          c3       0.71      0.75      0.73        20
          c4       0.78      0.70      0.74        20
          c5       0.86      0.95      0.90        20
          c6       0.65      0.75      0.70        20
          c7       0.73      0.55      0.63        20
          c8       0.42      0.80      0.55        20
          c9       1.00      0.40      0.57        20

    accuracy                           0.68       200
   macro avg       0.72      0.68      0.67       200
weighted avg       0.72      0.68      0.67       200



In [97]:
print(RandForest.predict_proba(X_labeled_test))

[[0.01       0.006      0.04404762 ... 0.002      0.1195     0.01      ]
 [0.90940476 0.         0.         ... 0.01       0.04083333 0.0225    ]
 [0.94166667 0.         0.         ... 0.         0.         0.05833333]
 ...
 [0.92666667 0.         0.         ... 0.         0.         0.06      ]
 [0.2202619  0.00666667 0.0205     ... 0.015      0.17980952 0.36592857]
 [0.11433333 0.00916667 0.0165     ... 0.         0.022      0.6155    ]]


Load datagen for actual test data

In [49]:
test_filenames = os.listdir('data/raw/imgs/test/test')

In [50]:
test_filenames = pd.DataFrame({'filename':test_filenames})

In [51]:
test_filenames

Unnamed: 0,filename
0,img_1.jpg
1,img_10.jpg
2,img_100.jpg
3,img_1000.jpg
4,img_100000.jpg
...,...
79721,img_99994.jpg
79722,img_99995.jpg
79723,img_99996.jpg
79724,img_99998.jpg


In [52]:
test_filenames.to_csv('data/raw/test_filenames.csv', index=False)

In [59]:
df_test

Unnamed: 0,filename
0,img_1.jpg
1,img_10.jpg
2,img_100.jpg
3,img_1000.jpg
4,img_100000.jpg
...,...
79721,img_99994.jpg
79722,img_99995.jpg
79723,img_99996.jpg
79724,img_99998.jpg


In [60]:
df_test['filename'] = df_test[['filename']].apply(lambda x: 'C:/Users/Dylan/Desktop/Data Science/Projects/DistractedDrivers/data/raw/imgs/test/test/' + x, axis=1)

In [61]:
df_test

Unnamed: 0,filename
0,C:/Users/Dylan/Desktop/Data Science/Projects/D...
1,C:/Users/Dylan/Desktop/Data Science/Projects/D...
2,C:/Users/Dylan/Desktop/Data Science/Projects/D...
3,C:/Users/Dylan/Desktop/Data Science/Projects/D...
4,C:/Users/Dylan/Desktop/Data Science/Projects/D...
...,...
79721,C:/Users/Dylan/Desktop/Data Science/Projects/D...
79722,C:/Users/Dylan/Desktop/Data Science/Projects/D...
79723,C:/Users/Dylan/Desktop/Data Science/Projects/D...
79724,C:/Users/Dylan/Desktop/Data Science/Projects/D...


In [105]:
os.getcwd()

'C:\\Users\\Dylan\\Desktop\\Data Science\\Projects\\DistractedDrivers\\data\\raw\\imgs\\test\\test'

In [62]:
df_test['class'] = 'unknown'

In [64]:
df_test.to_csv('C:/Users/Dylan/Desktop/Data Science/Projects/DistractedDrivers/data/raw/test_filenames.csv', index=False)

In [107]:
test = test_dgen.flow_from_dataframe(test_filenames,
                                     target_size=(227,227),
                                     batch_size=16,
                                     shuffle=False)

Found 79726 validated image filenames belonging to 1 classes.


In [108]:
test_predictions = model3.predict(test)

In [109]:
test_predictions = pd.DataFrame(test_predictions, columns=['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])

In [110]:
test_predictions

Unnamed: 0,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,0.008604,0.085207,0.018095,0.113947,0.040185,0.600935,0.077657,0.006483,0.026404,0.022482
1,0.019094,0.000826,0.001663,0.006799,0.003642,0.941262,0.001846,0.000832,0.017235,0.006800
2,0.200818,0.249065,0.058669,0.010818,0.004579,0.048115,0.014242,0.062749,0.039698,0.311247
3,0.004387,0.002688,0.024336,0.000805,0.002280,0.009872,0.004561,0.000767,0.943617,0.006687
4,0.045187,0.001458,0.000840,0.701084,0.199329,0.022523,0.005878,0.001054,0.016436,0.006211
...,...,...,...,...,...,...,...,...,...,...
79721,0.004477,0.186477,0.133851,0.001749,0.000536,0.011587,0.029475,0.449997,0.096771,0.085079
79722,0.013295,0.035097,0.000265,0.931134,0.004813,0.001793,0.002391,0.000746,0.002524,0.007941
79723,0.000791,0.041994,0.000147,0.846906,0.099453,0.000325,0.007390,0.000138,0.002115,0.000740
79724,0.000449,0.085182,0.286448,0.000522,0.000433,0.000592,0.328923,0.205457,0.084118,0.007875


In [121]:
test_predictions_df = test_filenames.join(test_predictions)
test_predictions_df.drop(columns=['class'], inplace=True)
test_predictions_df.rename(columns={'filename':'img'}, inplace=True)
test_predictions_df

Unnamed: 0,img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,img_1.jpg,0.008604,0.085207,0.018095,0.113947,0.040185,0.600935,0.077657,0.006483,0.026404,0.022482
1,img_10.jpg,0.019094,0.000826,0.001663,0.006799,0.003642,0.941262,0.001846,0.000832,0.017235,0.006800
2,img_100.jpg,0.200818,0.249065,0.058669,0.010818,0.004579,0.048115,0.014242,0.062749,0.039698,0.311247
3,img_1000.jpg,0.004387,0.002688,0.024336,0.000805,0.002280,0.009872,0.004561,0.000767,0.943617,0.006687
4,img_100000.jpg,0.045187,0.001458,0.000840,0.701084,0.199329,0.022523,0.005878,0.001054,0.016436,0.006211
...,...,...,...,...,...,...,...,...,...,...,...
79721,img_99994.jpg,0.004477,0.186477,0.133851,0.001749,0.000536,0.011587,0.029475,0.449997,0.096771,0.085079
79722,img_99995.jpg,0.013295,0.035097,0.000265,0.931134,0.004813,0.001793,0.002391,0.000746,0.002524,0.007941
79723,img_99996.jpg,0.000791,0.041994,0.000147,0.846906,0.099453,0.000325,0.007390,0.000138,0.002115,0.000740
79724,img_99998.jpg,0.000449,0.085182,0.286448,0.000522,0.000433,0.000592,0.328923,0.205457,0.084118,0.007875


In [126]:
test_predictions_df.to_csv('C:\\Users\\Dylan\\Desktop\\Data Science\\Projects\\DistractedDrivers\\data\\test_predictions(VGG16_Transfer_Learning).csv', index=False)

In [111]:
rf_predictions = model.predict_proba(test_predictions)

In [112]:
rf_predictions

array([[0.01277273, 0.05188699, 0.06216909, ..., 0.0022028 , 0.07507054,
        0.02621848],
       [0.        , 0.        , 0.        , ..., 0.        , 0.0188447 ,
        0.00457516],
       [0.50327548, 0.19732263, 0.00961905, ..., 0.        , 0.02691071,
        0.22182527],
       ...,
       [0.00173611, 0.        , 0.        , ..., 0.        , 0.00291667,
        0.        ],
       [0.        , 0.        , 0.19470271, ..., 0.38539343, 0.19947546,
        0.03235011],
       [0.        , 0.        , 0.00482759, ..., 0.00206897, 0.0069697 ,
        0.01766667]])

In [113]:
rf_test_predictions = pd.DataFrame(rf_predictions, columns=['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])

In [114]:
predictions = test_filenames.join(rf_test_predictions)

In [115]:
predictions.drop(columns=['class'], inplace=True)

In [116]:
predictions.rename(columns={'filename': 'img'}, inplace=True)

In [117]:
predictions

Unnamed: 0,img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,img_1.jpg,0.012773,0.051887,0.062169,0.045604,0.075507,0.612750,0.035819,0.002203,0.075071,0.026218
1,img_10.jpg,0.000000,0.000000,0.000000,0.000000,0.011932,0.964648,0.000000,0.000000,0.018845,0.004575
2,img_100.jpg,0.503275,0.197323,0.009619,0.003040,0.000000,0.024722,0.013285,0.000000,0.026911,0.221825
3,img_1000.jpg,0.000526,0.001600,0.003887,0.003750,0.005671,0.037143,0.023430,0.010000,0.770966,0.143027
4,img_100000.jpg,0.006813,0.000000,0.000000,0.957205,0.029316,0.001250,0.000000,0.000000,0.005417,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
79721,img_99994.jpg,0.028851,0.258388,0.141071,0.000000,0.000000,0.009987,0.022551,0.240912,0.170321,0.127919
79722,img_99995.jpg,0.012290,0.010714,0.021952,0.854877,0.000000,0.039625,0.010625,0.022601,0.012248,0.015068
79723,img_99996.jpg,0.001736,0.000000,0.000000,0.991806,0.001667,0.001250,0.000625,0.000000,0.002917,0.000000
79724,img_99998.jpg,0.000000,0.000000,0.194703,0.000000,0.000000,0.001304,0.186774,0.385393,0.199475,0.032350


In [118]:
os.getcwd()

'C:\\Users\\Dylan\\Desktop\\Data Science\\Projects\\DistractedDrivers\\data\\raw\\imgs\\test\\test'

In [127]:
predictions.to_csv('C:\\Users\\Dylan\\Desktop\\Data Science\\Projects\\DistractedDrivers\\data\\test_predictions(VGG16_Transfer_Learning_with_RandFor).csv', index=False)

In [124]:
test_predictions_df

Unnamed: 0,img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,img_1.jpg,0.008604,0.085207,0.018095,0.113947,0.040185,0.600935,0.077657,0.006483,0.026404,0.022482
1,img_10.jpg,0.019094,0.000826,0.001663,0.006799,0.003642,0.941262,0.001846,0.000832,0.017235,0.006800
2,img_100.jpg,0.200818,0.249065,0.058669,0.010818,0.004579,0.048115,0.014242,0.062749,0.039698,0.311247
3,img_1000.jpg,0.004387,0.002688,0.024336,0.000805,0.002280,0.009872,0.004561,0.000767,0.943617,0.006687
4,img_100000.jpg,0.045187,0.001458,0.000840,0.701084,0.199329,0.022523,0.005878,0.001054,0.016436,0.006211
...,...,...,...,...,...,...,...,...,...,...,...
79721,img_99994.jpg,0.004477,0.186477,0.133851,0.001749,0.000536,0.011587,0.029475,0.449997,0.096771,0.085079
79722,img_99995.jpg,0.013295,0.035097,0.000265,0.931134,0.004813,0.001793,0.002391,0.000746,0.002524,0.007941
79723,img_99996.jpg,0.000791,0.041994,0.000147,0.846906,0.099453,0.000325,0.007390,0.000138,0.002115,0.000740
79724,img_99998.jpg,0.000449,0.085182,0.286448,0.000522,0.000433,0.000592,0.328923,0.205457,0.084118,0.007875


In [125]:
predictions

Unnamed: 0,img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,img_1.jpg,0.012773,0.051887,0.062169,0.045604,0.075507,0.612750,0.035819,0.002203,0.075071,0.026218
1,img_10.jpg,0.000000,0.000000,0.000000,0.000000,0.011932,0.964648,0.000000,0.000000,0.018845,0.004575
2,img_100.jpg,0.503275,0.197323,0.009619,0.003040,0.000000,0.024722,0.013285,0.000000,0.026911,0.221825
3,img_1000.jpg,0.000526,0.001600,0.003887,0.003750,0.005671,0.037143,0.023430,0.010000,0.770966,0.143027
4,img_100000.jpg,0.006813,0.000000,0.000000,0.957205,0.029316,0.001250,0.000000,0.000000,0.005417,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
79721,img_99994.jpg,0.028851,0.258388,0.141071,0.000000,0.000000,0.009987,0.022551,0.240912,0.170321,0.127919
79722,img_99995.jpg,0.012290,0.010714,0.021952,0.854877,0.000000,0.039625,0.010625,0.022601,0.012248,0.015068
79723,img_99996.jpg,0.001736,0.000000,0.000000,0.991806,0.001667,0.001250,0.000625,0.000000,0.002917,0.000000
79724,img_99998.jpg,0.000000,0.000000,0.194703,0.000000,0.000000,0.001304,0.186774,0.385393,0.199475,0.032350
