# ME 592 Homework 4
## Jake Bergfeld, Mohammad Rashid Mohammad Shoaib, Melika Tajipour
### Engineering Image Analysis - Distracted Driving Classification

In [2]:
#Load libraries
import os
import numpy as np
import torch
import torch.nn as nn
import torchvision
import glob
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import pathlib

In [3]:
#Check for nvidia device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
print(device)

cuda


In [5]:
#Data transformation
transformer = transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),   #Do this to add variation to data, augmentation technique
    transforms.ToTensor(),              #Changes pixel range from color channel (0-255 to 0-1) changes from numpy to tensor
    transforms.Normalize([0.5, 0.5, 0.5], 
                         [0.5, 0.5, 0.5])     # 0-1 to [-1-1] 
])

In [7]:
#Dataloader using batches

# #Directory path for training & testing data
# train_path = 'Documents/HW4/state-farm-distracted-driver-detection/imgs/train'
# test_path = 'Documents/HW4/state-farm-distracted-driver-detection/imgs/test'

train_path = '/home/exouser/data/imgs/train'
test_path = '/home/exouser/data/imgs/test'

train_loader = DataLoader(
    torchvision.datasets.ImageFolder(train_path, transform=transformer),
    batch_size = 256, shuffle = True    #ADJUST THIS, HIGHER BATCH SIZE REQUIRES MORE MEMORY
)

# test_loader = DataLoader(
#     torchvision.datasets.ImageFolder(test_path, transform=transformer),
#     batch_size = 256, shuffle = True    #ADJUST THIS, HIGHER BATCH SIZE REQUIRES MORE MEMORY
# )

In [8]:
#Categorize the images
root = pathlib.Path(train_path)
classes = sorted ([j.name.split('/')[-1] for j in root.iterdir()])

In [9]:
print(classes)

['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']


In [10]:
#Building the CNN Network:
class ConvNet(nn.Module):
    def __init__(self,num_classes=10):
        super(ConvNet, self).__init__()
          
        #Formula for height and width of CNN output: ((w-f+2P/s)+1)
            # Where w = width (150), f = kernel size (3), P = padding (1), s = stride (1)
        
        #Input Shape = (256,3,150,150) in the format (batch size, RGB channel, image height, image width)
        
        #FIRST CNN LAYER:
        self.conv1=nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        #New shape = (256,12,150,150)
        self.bn1=nn.BatchNorm2d(num_features=12)
        #New shape = (256,12,150,150)
        self.reul1=nn.ReLU()
        #New shape = (256,12,150,150)
        #Now add max pooling layer
        self.pool=nn.MaxPool2d(kernel_size=2)
        #New shape = (256,12,75,75)
        
        #SECOND CNN LAYER:
        self.conv2=nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)
        #New shape = (256,20,75,75)
        self.relu1=nn.ReLU()
        #New shape =256,20,75,75)
        
        #THIRD CNN LAYER:
        self.conv3=nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1)
        #New shape = (256,32,75,75)
        self.bn1=nn.BatchNorm2d(num_features=32)
        #New shape = (256,32,75,75)
        self.reul1=nn.ReLU()
        #New shape = (256,32,75,75)
        
        #PLAY AROUND WITH THESE LAYERS, CAN ADD MORE LAYERS OR MORE DEPTH TO INCREASE ACCURACY
        self.fc=nn.Linear(in_features=32*75*75, out_features=num_classes)
                          
        
        #Feed forward function
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
        #This generates an matrix output with shape: (256,32,75,75)
            
        output=output.view(-1,32*75*75)
            
        output=self.fc(output)
        return output

In [11]:
model=ConvNet(num_classes=10).to(device)

In [12]:
#Optimizer & Loss function:
optimizer=Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [13]:
#Hyperparameter epoch count
num_epochs=10

In [14]:
#Calculating the size of training & testing images
train_count=len(glob.glob(train_path+'/**/*.jpg'))
test_count=len(glob.glob(test_path+'/**/*.jpg'))

In [15]:
print(train_count, test_count)

17462 0


In [16]:
#Training CNN network and saving best model
best_accuracy=0.0

for epoch in range(num_epochs):
    #Evaluation & training for training data
    model.train()
    training_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        train_loss+=loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    #Evaluation & training for test data
    model.eval()
    
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
        
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
        
    test_accuracy=test_accuracy/test_count
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(int(train_loss))+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    # #Save the best model
    # if test_accuracy>best_accuracy:
    #     torch.save(model.state_dict(),'best_checkpoint.model')
    #     best_accuracy=test_accuracy


RuntimeError: running_mean should contain 12 elements not 32