In [1]:
import os
import pandas as pd
from shutil import copyfile,rmtree
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [8]:
# getting the list of all train images
data = pd.read_csv("C://Users//almuhyaru//Downloads//state-farm-distracted-driver-detection//driver_imgs_list.csv", usecols = [1,2])
data.nunique(),data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22424 entries, 0 to 22423
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   classname  22424 non-null  object
 1   img        22424 non-null  object
dtypes: object(2)
memory usage: 350.5+ KB


(classname       10
 img          22424
 dtype: int64,
 None)

In [9]:
# list of all classes
classes_list = data['classname'].unique()
classes_list

array(['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'],
      dtype=object)

In [108]:
#dictionary containing all train data file names, class wise
train_data_files={}
for cls, image_name in data.values:
    key = cls
    if key in train_data_files:
        train_data_files[key].append(image_name)
    else:
        train_data_files[key] = [image_name]

# printing the size of dataset for each class
for key in train_data_files:
    print(key, ":", len(train_data_files[key]))

c0 : 2489
c1 : 2267
c2 : 2317
c3 : 2346
c4 : 2326
c5 : 2312
c6 : 2325
c7 : 2002
c8 : 1911
c9 : 2129


The 10 classes to predict are:

c0: normal driving
c1: texting - right
c2: talking on the phone - right
c3: texting - left
c4: talking on the phone - left
c5: operating the radio
c6: drinking
c7: reaching behind
c8: hair and makeup
c9: talking to passenger

### Splittin, transofrming and generating image data


In [10]:
BATCH_SIZE = 128
IMAGE_SIZE = 224

In [11]:
TRAIN_DIR='C://Users//almuhyaru//Downloads//state-farm-distracted-driver-detection/imgs/train'
datagen = ImageDataGenerator(
        rescale = 1./255,
        validation_split = 0.2
)

training_data = datagen.flow_from_directory(TRAIN_DIR,
                                        target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                        batch_size=BATCH_SIZE,
                                        subset='training',shuffle=False)

evaluating_data = datagen.flow_from_directory(TRAIN_DIR,
                                        target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                        batch_size=BATCH_SIZE,
                                        subset='validation',shuffle=False)

Found 17943 images belonging to 10 classes.
Found 4481 images belonging to 10 classes.


### Creating the model - CNN

In [12]:
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten

In [14]:
model = keras.models.Sequential([
      Conv2D(16, (3,3), activation='relu', input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3)),
      MaxPooling2D(2, 2),
      Conv2D(32, (3,3), activation='relu'),
      MaxPooling2D(2, 2),
      Conv2D(64, (3,3), activation='relu'),
      MaxPooling2D(2, 2),
      Flatten(),
      Dense(1024, activation='relu'),
      Dense(10, activation='softmax')
])

In [15]:
# compile the model
model.compile(optimizer= Adam(learning_rate = 0.001), loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 16)      448       
                                                                 
 max_pooling2d (MaxPooling2  (None, 111, 111, 16)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 32)      4640      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 54, 54, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 52, 52, 64)        18496     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 26, 26, 64)        0

In [16]:
MIN_DELTA=0.005
EPOCHS=20
PATIENCE=2

In [17]:
# to stop training if no significant change in validation data accuracy
es = EarlyStopping(monitor = 'val_accuracy', patience = PATIENCE, min_delta = MIN_DELTA)

In [18]:
# fitting and generating the model
model.fit(
    training_data, 
    epochs = EPOCHS, 
    validation_data = evaluating_data,
    callbacks = [es]
         )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20


<keras.src.callbacks.History at 0x2234ab48fa0>

### Predict and load test data

In [19]:
from tensorflow.keras.utils import image_dataset_from_directory

In [20]:
img_dir="C://Users//almuhyaru//Downloads//state-farm-distracted-driver-detection/imgs"

In [21]:
# transforming test data same as training data
test_data_gen = ImageDataGenerator(
    rescale = 1./225
)

test_data = test_data_gen.flow_from_directory(img_dir,
                          target_size = (IMAGE_SIZE, IMAGE_SIZE),
                          classes = ['test'],
                          shuffle = False,
                          batch_size = BATCH_SIZE)

Found 79726 images belonging to 1 classes.


In [22]:
predicted = model.predict(test_data)
predicted.shape



(79726, 10)

###  Loading the prediction in required format

In [104]:
test_data_files = image_dataset_from_directory(
    'C://Users//almuhyaru//Downloads//state-farm-distracted-driver-detection/imgs/test',
     labels = None,
    label_mode=None,
)

Found 79726 files belonging to 1 classes.


In [106]:
df = pd.DataFrame(predicted)
df.columns = ['c0','c1','c2','c3','c4','c5','c6','c7','c8','c9']
filepath = [i.split('/')[-1] for i in test_data_files.file_paths]
df1 = pd.DataFrame(filepath)
df1.columns = ['img']
df = df1.join(df)
df.to_csv('output.csv',index=False)

In [103]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79726 entries, 0 to 79725
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   img     79726 non-null  object 
 1   c0      79726 non-null  float32
 2   c1      79726 non-null  float32
 3   c2      79726 non-null  float32
 4   c3      79726 non-null  float32
 5   c4      79726 non-null  float32
 6   c5      79726 non-null  float32
 7   c6      79726 non-null  float32
 8   c7      79726 non-null  float32
 9   c8      79726 non-null  float32
 10  c9      79726 non-null  float32
dtypes: float32(10), object(1)
memory usage: 3.6+ MB
