# ME 592 Homework 4
## Jake Bergfeld, Mohammad Rashid Mohammad Shoaib, Melika Tajipour
#### Engineering Image Analysis - Distracted Driving Classification

##### Gathering data from Kaggle - Link to data: https://www.kaggle.com/competitions/state-farm-distracted-driver-detection/data

In [15]:
# !pip install kaggle
# !mkdir .kaggle  #naming required by kaggle API, creates a hidden folder
# !cp /home/exouser/Downloads/kaggle.json /home/exouser/.kaggle/kaggle.json

##### <u>Confirming location of Kaggle API token was moved successfully

In [21]:
# !cd .kaggle && ls

##### <u>Downloading the specific dataset and confirming locations

In [17]:
# !kaggle datasets list -s 'State Farm Distracted Driver Detection'

In [18]:
# !kaggle datasets download -d 'rightway11/state-farm-distracted-driver-detection'

In [19]:
# !sudo apt-get install unzip
# !unzip state-farm-distracted-driver-detection.zip -d data/

In [20]:
# !cd data && ls
# !cd data/imgs && ls
# !cd data/imgs/train && ls
# !cd data/imgs/test && ls

In [2]:
#Load libraries
import os
import numpy as np
import torch
import torch.nn as nn
import torchvision
import glob
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import pathlib
import pandas as pd
import numpy as np
import cv2
import random

In [3]:
# Load driver details and image filenames
driver_imgs = pd.read_csv('/home/exouser/data/driver_imgs_list.csv')
driver_imgs

Unnamed: 0,subject,classname,img
0,p012,c0,img_10206.jpg
1,p012,c0,img_27079.jpg
2,p012,c0,img_50749.jpg
3,p012,c0,img_97089.jpg
4,p012,c0,img_37741.jpg
...,...,...,...
34919,p075,c9,img_15827.jpg
34920,p075,c9,img_16688.jpg
34921,p075,c9,img_64532.jpg
34922,p075,c9,img_7918.jpg


### <u>Data Information: 
    Default image size is 320x240
    
    The 10 classes to predict are:
        c0: normal driving
        c1: texting - right
        c2: talking on the phone - right
        c3: texting - left
        c4: talking on the phone - left
        c5: operating the radio
        c6: drinking
        c7: reaching behind
        c8: hair and makeup
        c9: talking to passenger

### <u>Step 1: Train a model with roughly 500,000 parameters

##### *Formula to calculate the number of parameters in a CNN:*
   -  Convolutional layer: (in_channels x out_channels x kernel_height x kernel_width) + out_channels
   -  Batch normalization layer: 2 x num_features
   -  ReLU activation layer: 0 (no parameters)
   -  Max pooling layer: 0 (no parameters)
   -  Fully connected layer: (in_features x out_features) + out_features

In [4]:
#Load libraries
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib
import shutil
import random

In [12]:
#checking for device
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [13]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [14]:
#Dataloader

#Path for training and testing directory
train_path='/home/exouser/data/imgs/train'
# test_path='/home/exouser/data/imgs/test'


train_loader=DataLoader(
    torchvision.datasets.ImageFolder(train_path,transform=transformer),
    batch_size=64, shuffle=True
)
# test_loader=DataLoader(
#     torchvision.datasets.ImageFolder(test_path,transform=transformer),
#     batch_size=32, shuffle=True
# )

In [16]:
# #Breaking the labeled training data into 2 datasets: Training & labeled Testing images
# path = '/home/exouser/data/imgs/train (copy)'

# folder1 = '/home/exouser/data/imgs/train_copy/train_imgs'    #70% ofthe training images will be moved here
# folder2 = '/home/exouser/data/imgs/train_copy/test_imgs'     #30% of the training images will be moved here

# image_files = os.listdir(path)
# random.shuffle(image_files)

# split_index = int(0.7*len(image_files))   #70/30 split
# image_set1 = image_files[:split_index]
# image_set2 = image_files[split_index:]

# for image_file in image_set1:
#     shutil.move(os.path.join(path,image_file), folder1)
    
# for image_file in image_set2:
#     shutil.move(os.path.join(path, image_file), folder2)


In [17]:
#categories
root=pathlib.Path(train_path)
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])
print(classes)


['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']


In [65]:
#Building a CNN Network with ~500,000 parameters

class MoreParamsConvNet(nn.Module):
    def __init__(self,num_classes=10):
        super(MoreParamsConvNet,self).__init__()

        self.conv1=nn.Conv2d(in_channels=3,out_channels=32,kernel_size=3,stride=1,padding=1)
        self.bn1=nn.BatchNorm2d(num_features=32)
        self.relu1=nn.ReLU()

        self.conv2=nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=1)
        self.bn2=nn.BatchNorm2d(num_features=64)
        self.relu2=nn.ReLU()

        self.conv3=nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=1)
        self.bn3=nn.BatchNorm2d(num_features=128)
        self.relu3=nn.ReLU()

        self.conv4=nn.Conv2d(in_channels=128,out_channels=256,kernel_size=3,stride=1,padding=1)
        self.bn4=nn.BatchNorm2d(num_features=256)
        self.relu4=nn.ReLU()

        self.pool=nn.MaxPool2d(kernel_size=2)

        self.fc1=nn.Linear(in_features=256*18*18,out_features=512)
        self.relu5=nn.ReLU()

        self.fc2=nn.Linear(in_features=512,out_features=num_classes)

        # Total parameter count for the CNN: 32*3*3*3 + 32 + 64*32*3*3 + 64 + 128*64*3*3 + 128 + 256*128*3*3 + 256 + 256*18*18*512 + 512 + 512*10 = 492,532
    
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)

        output=self.pool(output)

        output=self.conv2(output)
        output=self.bn2(output)
        output=self.relu2(output)

        output=self.pool(output)

        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
    
        output=self.pool(output)

        output=self.conv4(output)
        output=self.bn4(output)
        output=self.relu4(output)

        output=output.view(-1,256*18*18)

        output=self.fc1(output)
        output=self.relu5(output)

        output=self.fc2(output)

        return output
        

In [66]:
model=MoreParamsConvNet(num_classes=10).to(device)

In [67]:
#Optmizer and loss function
optimizer=Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [68]:
num_epochs=10

In [69]:
#calculating the size of training and testing images
train_count=len(glob.glob(train_path+'/**/*.jpg'))

In [70]:
print(train_count)

17462


In [71]:
#Model training and saving best model

best_accuracy=0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()
        
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy))

    
#     # Evaluation on testing dataset
#     model.eval()
    
#     test_accuracy=0.0
#     for i, (images,labels) in enumerate(test_loader):
#         if torch.cuda.is_available():
#             images=Variable(images.cuda())
#             labels=Variable(labels.cuda())
            
#         outputs=model(images)
#         _,prediction=torch.max(outputs.data,1)
#         test_accuracy+=int(torch.sum(prediction==labels.data))
    
#     test_accuracy=test_accuracy/test_count
    
    
#     print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
#     print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy))

#     #Save the best model
#     if test_accuracy>best_accuracy:
#         torch.save(model.state_dict(),'best_checkpoint.model')
#         best_accuracy=test_accuracy
    
       

Epoch: 0 Train Loss: tensor(2.6189) Train Accuracy: 0.46512426984308786
Epoch: 1 Train Loss: tensor(0.3767) Train Accuracy: 0.8866681937922346
Epoch: 2 Train Loss: tensor(0.1327) Train Accuracy: 0.9620318405680907
Epoch: 3 Train Loss: tensor(0.0775) Train Accuracy: 0.9772649181078914
Epoch: 4 Train Loss: tensor(0.0551) Train Accuracy: 0.9839651815370519
Epoch: 5 Train Loss: tensor(0.0501) Train Accuracy: 0.9841942503722368
Epoch: 6 Train Loss: tensor(0.0535) Train Accuracy: 0.9831634406139045
Epoch: 7 Train Loss: tensor(0.0479) Train Accuracy: 0.9859122666361242
Epoch: 8 Train Loss: tensor(0.0380) Train Accuracy: 0.9884892910319552
Epoch: 9 Train Loss: tensor(0.0539) Train Accuracy: 0.9833352422402932


### <u>Step 2: Train a model with roughly 10,000,000 parameters

In [57]:
class LargeConvNet(nn.Module):
    def __init__(self, num_classes=10):
        super(LargeConvNet, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(num_features=64)
        self.relu1 = nn.ReLU()

        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, stride=2, padding=2)
        self.bn2 = nn.BatchNorm2d(num_features=128)
        self.relu2 = nn.ReLU()

        self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(num_features=256)
        self.relu3 = nn.ReLU()

        self.conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(num_features=512)
        self.relu4 = nn.ReLU()

        self.conv5 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(num_features=1024)
        self.relu5 = nn.ReLU()

        self.pool = nn.MaxPool2d(kernel_size=2)

        self.fc1 = nn.Linear(in_features=1024 * 8 * 8, out_features=2048)
        self.relu6 = nn.ReLU()

        self.fc2 = nn.Linear(in_features=2048, out_features=num_classes)

        # Total parameter count for the CNN: 64*3*3*3*3 + 64 + 128*64*5*5*3 + 128 + 256*128*3*3*1 + 256 + 512*256*3*3*1 + 512 + 1024*512*3*3*1 + 1024 + 1024*8*8*2048 + 2048 + 2048*10 
        # = 10,041,866

    def forward(self, input):
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)

        output = self.pool(output)

        output = self.conv2(output)
        output = self.bn2(output)
        output = self.relu2(output)

        output = self.pool(output)

        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)

        output = self.conv4(output)
        output = self.bn4(output)
        output = self.relu4(output)

        output = self.conv5(output)
        output = self.bn5(output)
        output = self.relu5(output)

        output = output.view(-1, 1024 * 8 * 8)

        output = self.fc1(output)
        output = self.relu6(output)

        output = self.fc2(output)

        return output


In [58]:
model=LargeConvNet(num_classes=10).to(device)

In [59]:
#Optmizer and loss function
optimizer=Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [60]:
num_epochs=10

In [61]:
#calculating the size of training and testing images
train_count=len(glob.glob(train_path+'/**/*.jpg'))

In [62]:
print(train_count)

17462


In [64]:
#Model training and saving best model

best_accuracy=0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()
        
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy))

    
#     # Evaluation on testing dataset
#     model.eval()
    
#     test_accuracy=0.0
#     for i, (images,labels) in enumerate(test_loader):
#         if torch.cuda.is_available():
#             images=Variable(images.cuda())
#             labels=Variable(labels.cuda())
            
#         outputs=model(images)
#         _,prediction=torch.max(outputs.data,1)
#         test_accuracy+=int(torch.sum(prediction==labels.data))
    
#     test_accuracy=test_accuracy/test_count
    
    
#     print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
#     print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy))

#     #Save the best model
#     if test_accuracy>best_accuracy:
#         torch.save(model.state_dict(),'best_checkpoint.model')
#         best_accuracy=test_accuracy
    
       

ValueError: Expected input batch_size (81) to match target batch_size (64).