## ME 592 Homework 4 Engineering Image Analysis - Distracted Driving Classification
#### Jake Bergfeld, Mohammad Rashid Mohammad Shoaib, Melika Tajipour

#### <u>Gathering data from Kaggle - Link to data: https://www.kaggle.com/competitions/state-farm-distracted-driver-detection/data

In [1]:
# !pip install kaggle
# !mkdir .kaggle  #naming required by kaggle API, creates a hidden folder
# !cp /home/exouser/Downloads/kaggle.json /home/exouser/.kaggle/kaggle.json

#### <u>Confirming location of Kaggle API token was moved successfully

In [2]:
!cd .kaggle && ls

kaggle.json


#### <u>Downloading the specific dataset and confirming locations

In [3]:
# !kaggle datasets list -s 'State Farm Distracted Driver Detection'

In [4]:
# !kaggle datasets download -d 'rightway11/state-farm-distracted-driver-detection'

In [5]:
# !sudo apt-get install unzip
# !unzip state-farm-distracted-driver-detection.zip -d data/

In [6]:
# !cd data && ls
# !cd data/imgs && ls
# !cd data/imgs/train && ls
# !cd data/imgs/test && ls

In [26]:
#Load libraries
import os
import numpy as np
import torch
import torch.nn as nn
import torchvision
import glob
from torchvision.transforms import transforms
# from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable

import pathlib
import pandas as pd
import numpy as np
import cv2
import random

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image
import pandas as pd
import os

In [27]:
# Load driver details and image filenames
driver_imgs = pd.read_csv('/home/exouser/data/driver_imgs_list.csv')
driver_imgs

Unnamed: 0,subject,classname,img
0,p012,c0,img_10206.jpg
1,p012,c0,img_27079.jpg
2,p012,c0,img_50749.jpg
3,p012,c0,img_97089.jpg
4,p012,c0,img_37741.jpg
...,...,...,...
34919,p075,c9,img_15827.jpg
34920,p075,c9,img_16688.jpg
34921,p075,c9,img_64532.jpg
34922,p075,c9,img_7918.jpg


### <u>Data Information: 
    Default image size is 320x240
    
    The 10 classes to predict are:
        c0: normal driving
        c1: texting - right
        c2: talking on the phone - right
        c3: texting - left
        c4: talking on the phone - left
        c5: operating the radio
        c6: drinking
        c7: reaching behind
        c8: hair and makeup
        c9: talking to passenger

### <u>Step 1: Train a model with roughly 500,000 parameters

##### *Formula to calculate the number of parameters in a CNN:*
   -  Convolutional layer: (in_channels x out_channels x kernel_height x kernel_width) + out_channels
   -  Batch normalization layer: 2 x num_features
   -  ReLU activation layer: 0 (no parameters)
   -  Max pooling layer: 0 (no parameters)
   -  Fully connected layer: (in_features x out_features) + out_features

In [28]:
#Check for nvidia device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [29]:
#Directory path for training & testing data
train_path = 'Documents/HW4/state-farm-distracted-driver-detection/imgs/train'
test_path = 'Documents/HW4/state-farm-distracted-driver-detection/imgs/test'


In [30]:
#Preprocess the data
class DistractedDriverDataset(train_path):
    def __init__(self, img_dir, driver_csv, transform=None):
        self.img_dir = img_dir
        self.driver_csv = pd.read_csv(driver_csv)
        self.transform = transform
        
    def __len__(self):
        return len(self.driver_csv)
    
    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.driver_csv.iloc[idx, 1])
        img = Image.open(img_name)
        label = self.driver_csv.iloc[idx, 2]
        
        if self.transform:
            img = self.transform(img)
            
        return img, label

TypeError: str() argument 2 must be str, not tuple

In [31]:
# Define the data augmentations
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [32]:
#Directory path for training & testing data
train_path = 'Documents/HW4/state-farm-distracted-driver-detection/imgs/train'
test_path = 'Documents/HW4/state-farm-distracted-driver-detection/imgs/test'

In [33]:
#Define the dataloaders
train_loader = DataLoader(train_path, batch_size=32, shuffle=True)
test_loader = DataLoader(test_path, batch_size=32, shuffle=False)

In [34]:
#Build a CNN with ~500,000 parameters
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(256 * 14 * 14, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        x = x.view(-1, 256 * 14 * 14)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
model = CNN()


In [36]:
#For multi-class use a cross-entropy loss and Adam optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [39]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(10):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
        
        correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print('Accuracy of the network on the test images after epoch %d: %.2f %%' % (epoch + 1, accuracy))


ValueError: too many values to unpack (expected 2)