# ME 592 Homework 4
## Jake Bergfeld, Mohammad Rashid Mohammad Shoaib, Melika Tajipour
#### Engineering Image Analysis - Distracted Driving Classification

##### Gathering data from Kaggle - Link to data: https://www.kaggle.com/competitions/state-farm-distracted-driver-detection/data

In [38]:
# !pip install kaggle
# !mkdir .kaggle  #naming required by kaggle API, creates a hidden folder
# !cp /home/exouser/Downloads/kaggle.json /home/exouser/.kaggle/kaggle.json

##### <u>Confirming location of Kaggle API token was moved successfully

In [1]:
!cd .kaggle && ls

kaggle.json


##### <u>Downloading the specific dataset and confirming locations

In [2]:
# !kaggle datasets list -s 'State Farm Distracted Driver Detection'

In [3]:
# !kaggle datasets download -d 'rightway11/state-farm-distracted-driver-detection'

In [4]:
# !sudo apt-get install unzip
# !unzip state-farm-distracted-driver-detection.zip -d data/

In [5]:
# !cd data && ls
# !cd data/imgs && ls
# !cd data/imgs/train && ls
# !cd data/imgs/test && ls

In [6]:
#Load libraries
import os
import numpy as np
import torch
import torch.nn as nn
import torchvision
import glob
from torchvision.transforms import transforms
# from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable

import pathlib
import pandas as pd
import numpy as np
import cv2
import random

In [8]:
# Load driver details and image filenames
driver_imgs = pd.read_csv('/home/exouser/data/driver_imgs_list.csv')
driver_imgs

Unnamed: 0,subject,classname,img
0,p012,c0,img_10206.jpg
1,p012,c0,img_27079.jpg
2,p012,c0,img_50749.jpg
3,p012,c0,img_97089.jpg
4,p012,c0,img_37741.jpg
...,...,...,...
34919,p075,c9,img_15827.jpg
34920,p075,c9,img_16688.jpg
34921,p075,c9,img_64532.jpg
34922,p075,c9,img_7918.jpg


### <u>Data Information: 
    Default image size is 320x240
    
    The 10 classes to predict are:
        c0: normal driving
        c1: texting - right
        c2: talking on the phone - right
        c3: texting - left
        c4: talking on the phone - left
        c5: operating the radio
        c6: drinking
        c7: reaching behind
        c8: hair and makeup
        c9: talking to passenger

### <u>Step 1: Train a model with roughly 500,000 parameters

##### *Formula to calculate the number of parameters in a CNN:*
   -  Convolutional layer: (in_channels x out_channels x kernel_height x kernel_width) + out_channels
   -  Batch normalization layer: 2 x num_features
   -  ReLU activation layer: 0 (no parameters)
   -  Max pooling layer: 0 (no parameters)
   -  Fully connected layer: (in_features x out_features) + out_features

In [9]:
#Check for nvidia device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [10]:
#Data transformation
transformer = transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),   #Do this to add variation to data, augmentation technique
    transforms.ToTensor(),              #Changes pixel range from color channel (0-255 to 0-1) changes from numpy to tensor
    transforms.Normalize([0.5, 0.5, 0.5], 
                         [0.5, 0.5, 0.5])     # 0-1 to [-1-1] 
])

In [11]:
#Dataloader using batches

#Directory path for training & testing data
train_path = 'Documents/HW4/state-farm-distracted-driver-detection/imgs/train'
test_path = 'Documents/HW4/state-farm-distracted-driver-detection/imgs/test'

train_loader = DataLoader(
    torchvision.datasets.ImageFolder(train_path, transform=transformer),
    batch_size = 256, shuffle = True    #ADJUST THIS, HIGHER BATCH SIZE REQUIRES MORE MEMORY
)

# test_loader = DataLoader(
#     torchvision.datasets.ImageFolder(test_path, transform=transformer),
#     batch_size = 256, shuffle = True    #ADJUST THIS, HIGHER BATCH SIZE REQUIRES MORE MEMORY
# )

In [12]:
#Categorize the images
root = pathlib.Path(train_path)
classes = sorted ([j.name.split('/')[-1] for j in root.iterdir()])
print(classes)

['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']


In [28]:
#Building the CNN Network:
class ConvNet(nn.Module):
    def __init__(self,num_classes=10):
        super(ConvNet, self).__init__()
          
        #Formula for height and width of CNN output: ((w-f+2P/s)+1)
            # Where w = width (150), f = kernel size (3), P = padding (1), s = stride (1)
        
        #Input Shape = (256,3,150,150) in the format (batch size, RGB channel, image height, image width)
        
        #FIRST CNN LAYER:
        self.conv1=nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        #New shape = (256,12,150,150)
        self.bn1=nn.BatchNorm2d(num_features=12)
        #New shape = (256,12,150,150)
        self.reul1=nn.ReLU()
        #New shape = (256,12,150,150)
        #Now add max pooling layer
        self.pool=nn.MaxPool2d(kernel_size=2)
        #New shape = (256,12,75,75)
        
        #SECOND CNN LAYER:
        self.conv2=nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)
        #New shape = (256,20,75,75)
        self.relu1=nn.ReLU()
        #New shape =256,20,75,75)
        
        #THIRD CNN LAYER:
        self.conv3=nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1)
        #New shape = (256,32,75,75)
        self.bn1=nn.BatchNorm2d(num_features=32)
        #New shape = (256,32,75,75)
        self.reul1=nn.ReLU()
        #New shape = (256,32,75,75)
        
        #PLAY AROUND WITH THESE LAYERS, CAN ADD MORE LAYERS OR MORE DEPTH TO INCREASE ACCURACY
        self.fc=nn.Linear(in_features=12*75*75, out_features=num_classes)
                          
        
        #Feed forward function
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
        #This generates an matrix output with shape: (256,32,75,75)
            
        output=output.view(-1,32*75*75)
            
        output=self.fc(output)
        return output

In [29]:
model=ConvNet(num_classes=10).to(device)

In [30]:
#Optimizer & Loss function:
optimizer=Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [31]:
#Hyperparameter epoch count
num_epochs=10

In [32]:
#Calculating the size of training & testing images
train_count=len(glob.glob(train_path+'/**/*.jpg'))
# test_count=len(glob.glob(test_path+'/**/*.jpg'))

In [33]:
print(train_count)

22424


In [36]:
...

# Reset batch normalization layer parameters
model.apply(lambda m: m.reset_parameters() if isinstance(m, nn.BatchNorm2d) else None)

#Training CNN network and saving best model
best_accuracy=0.0

...


Ellipsis

In [37]:
#Training CNN network and saving best model
best_accuracy=0.0

for epoch in range(num_epochs):
    #Evaluation & training for training data
    model.train()
    training_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        train_loss+=loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    #Evaluation & training for test data
    model.eval()
    
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
        
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
        
    test_accuracy=test_accuracy/test_count
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(int(train_loss))+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    # #Save the best model
    # if test_accuracy>best_accuracy:
    #     torch.save(model.state_dict(),'best_checkpoint.model')
    #     best_accuracy=test_accuracy


RuntimeError: running_mean should contain 12 elements not 32

In [35]:
# Convert the class labels to integers
class_names = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']
class_dict = {class_name: i for i, class_name in enumerate(class_names)}

# Shuffle the data
driver_imgs = driver_imgs.sample(frac=1, random_state=42)

# Split the data into train and validation sets
train_size = int(0.8 * len(driver_imgs))
train_driver_imgs = driver_imgs[:train_size].reset_index(drop=True)
valid_driver_imgs = driver_imgs[train_size:].reset_index(drop=True)

# Load and pre-process the images
def load_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, img_size)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.astype(np.float32) / 255.0
    return image

def preprocess_images(driver_imgs):
    images = []
    labels = []

    for i in range(len(driver_imgs)):
        image_path = 'statefarm-distracted-driver-detection/train/' + driver_imgs.loc[i, 'classname'] + '/' + driver_imgs.loc[i, 'img']
        image = load_image(image_path)
        images.append(image)
        labels.append(class_dict[driver_imgs.loc[i, 'classname']])

    images = np.array(images)
    labels = np.array(labels)

    return images, labels

train_images, train_labels = preprocess_images(train_driver_imgs)
valid_images, valid_labels = preprocess_images(valid_driver_imgs)

[ WARN:0@2064.225] global loadsave.cpp:244 findDecoder imread_('statefarm-distracted-driver-detection/train/c2/img_14717.jpg'): can't open/read file: check file path/integrity


NameError: name 'img_size' is not defined

In [53]:
# Check for nvidia device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [54]:
# Data transformation
transformer = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])


In [None]:
# Dataloader using batches

train_loader = DataLoader(
    torchvision.datasets.ImageFolder(train_path, transform=transformer),
    batch_size=256, shuffle=True
)

# test_loader = DataLoader(
#     torchvision.datasets.ImageFolder(test_path, transform=transformer),
#     batch_size=256, shuffle=False
# )


In [None]:
# Categorize the images
root = pathlib.Path(train_path)
classes = [j.name.split('/')[-1] for j in root.iterdir()]


In [None]:
#Building the CNN Network:
class ConvNet(nn.Module):
    def __init__(self,num_classes=10):
        super(ConvNet, self).__init__()
          
        #Formula for height and width of CNN output: ((w-f+2P/s)+1)
            # Where w = width (150), f = kernel size (3), P = padding (1), s = stride (1)
        
        #Input Shape = (256,3,150,150) in the format (batch size, RGB channel, image height, image width)
        
        #FIRST CNN LAYER:
        self.conv1=nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        #New shape = (256,12,150,150)
        self.bn1=nn.BatchNorm2d(num_features=12)
        #New shape = (256,12,150,150)
        self.reul1=nn.ReLU()
        #New shape = (256,12,150,150)
        #Now add max pooling layer
        self.pool=nn.MaxPool2d(kernel_size=2)
        #New shape = (256,12,75,75)
        
        #SECOND CNN LAYER:
        self.conv2=nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)
        #New shape = (256,20,75,75)
        self.relu1=nn.ReLU()
        #New shape =256,20,75,75)
        
        #THIRD CNN LAYER:
        self.conv3=nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1)
        #New shape = (256,32,75,75)
        self.bn1=nn.BatchNorm2d(num_features=32)
        #New shape = (256,32,75,75)
        self.reul1=nn.ReLU()
        #New shape = (256,32,75,75)
        
        #PLAY AROUND WITH THESE LAYERS, CAN ADD MORE LAYERS OR MORE DEPTH TO INCREASE ACCURACY
        self.fc=nn.Linear(in_features=32*75*75, out_features=num_classes)
                          
        
        #Feed forward function
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
        #This generates an matrix output with shape: (256,32,75,75)
            
        output=output.view(-1,32*75*75)
            
        output=self.fc(output)
        return output

In [None]:
# #Saving the trained model
# torch.save(model.state_dict(),'model.ckpt')
# print('Model trained and saved successfully')

### <u>Step 2: Train a model with roughly 10,00,000 parameters