# Simple CNN architechture

## Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import trange, tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [2]:
import torch
import torchvision
import pandas as pd
import numpy as np
from PIL import Image
from Functions import import_raw_colour_image, import_raw_depth_image, show_depth_image, show_img
import os
from skimage import io, transform
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import warnings
warnings.filterwarnings("ignore")
from pathlib import Path
import torch.nn.functional as F
from torchsummary import summary

plt.ion()   # interactive mode

## Reading in the csv data structure

In [3]:
import csv
with open('..\..\data_descriptions.csv', newline='') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
    count = 0
    for row in spamreader:
        if count == 0:
            folder_names = row
        else:
            num_files = row
        count = 1

In [4]:
for i in range(0,len(num_files)):
    num_files[i] = int(num_files[i])

In [5]:
list_of_numbers = ["{0:05}".format(i) for i in range(1, sum(num_files)+1)]
colour_filenames = []
depth_filenames = []
for num in list_of_numbers:
    colour_filenames.append(f"colour_{num}.raw")
    depth_filenames.append(f"depth_{num}.raw")

## Dataset class

In [6]:
class ModerateDataset(Dataset):

    def __init__(self, col_dir='', depth_dir='', transform=transforms.ToTensor(),trans_on=False):
        self.path_names = {}
        for folder in folder_names:
            self.path_names[f"{folder}"] = {}
        for folder in folder_names:
            self.path_names[f'{folder}']['colour'] = {}
            self.path_names[f'{folder}']['depth'] = {}
        for i in range(1, num_files[0]):
            self.path_names['Sunny']['colour'][f"{i}"] = {}
            self.path_names['Sunny']['depth'][f"{i}"] = {}
        print("*************MAKE SURE THE PATH FILE IN THE FOR LOOP IS THE BASE IMAGE DIRECTORY ON YOUR COMPUTER**************")
        count = 0
        for folder in folder_names:
            for i in range(0, num_files[folder_names.index(folder)]):
                self.path_names[f'{folder}']['colour'][f'{i+1}'] = Path(f"C:/Users/Ben/OneDrive - Bournemouth University/Computer Vision/Moderate collection/{folder}/colour/{colour_filenames[count+i]}")
                self.path_names[f'{folder}']['depth'][f'{i+1}'] = Path(f"C:/Users/Ben/OneDrive - Bournemouth University/Computer Vision/Moderate collection/{folder}/depth/{depth_filenames[count+i]}")
            count = count + num_files[folder_names.index(folder)]
        
        self.transform = transform
        self.col_dir = col_dir
        self.depth_dir = depth_dir
        self.trans_on = trans_on

    def __getitem__(self,idx):
        if idx == 0:
            
            self.col_dir = self.path_names[f'{folder_names[0]}']['colour'][f'{idx+1}']
            self.depth_dir = self.path_names[f'{folder_names[0]}']['depth'][f'{idx+1}']
        
        if (idx>0 and idx <= num_files[0]):  ## 1-500

            self.col_dir = self.path_names[f'{folder_names[0]}']['colour'][f'{idx}']
            self.depth_dir = self.path_names[f'{folder_names[0]}']['depth'][f'{idx}']

        elif (idx > num_files[0] and idx < (sum(num_files[:2])+1)): ## 501 - 1500

            self.col_dir = self.path_names[f'{folder_names[1]}']['colour'][f'{idx-num_files[0]}']
            self.depth_dir = self.path_names[f'{folder_names[1]}']['depth'][f'{idx-num_files[0]}']

        elif (idx > sum(num_files[:2]) and idx < (sum(num_files[:3])+1) ): ## 1501 - 2600

            self.col_dir = self.path_names[f'{folder_names[2]}']['colour'][f'{idx-sum(num_files[:2])}'] # -1500
            self.depth_dir = self.path_names[f'{folder_names[2]}']['depth'][f'{idx-sum(num_files[:2])}']

        elif (idx > sum(num_files[:3]) and idx < (sum(num_files[:4])+1) ): ## 2601 - 5600

            self.col_dir = self.path_names[f'{folder_names[3]}']['colour'][f'{idx-sum(num_files[:3])}'] #-2600
            self.depth_dir = self.path_names[f'{folder_names[3]}']['depth'][f'{idx-sum(num_files[:3])}']
            
        elif (idx > sum(num_files[:4]) and idx < (sum(num_files[:5])+1) ): ## 5601 - 7857

            self.col_dir = self.path_names[f'{folder_names[4]}']['colour'][f'{idx-sum(num_files[:4])}'] # -5600
            self.depth_dir = self.path_names[f'{folder_names[4]}']['depth'][f'{idx-sum(num_files[:4])}']

        elif (idx > sum(num_files)):
            raise NameError('Index outside of range')

        col_img = import_raw_colour_image(self.col_dir)
        depth_img = import_raw_depth_image(self.depth_dir)
        if self.trans_on == True:
            col_img = torch.from_numpy(np.flip(col_img,axis=0).copy()) # apply any transforms
            depth_img = torch.from_numpy(np.flip(depth_img,axis=0).copy()) # apply any transforms
            col_img = col_img.transpose(0,2)
            col_img = col_img.transpose(1,2)
        return col_img, depth_img
    
    def __len__(self):
        return sum(num_files)



In [7]:
total_Data = ModerateDataset(trans_on=True)  ## instancing the dataset

*************MAKE SURE THE PATH FILE IN THE FOR LOOP IS THE BASE IMAGE DIRECTORY ON YOUR COMPUTER**************


## Train/test splitting

In [8]:
train_size = int(0.8 * len(total_Data))
val_size = len(total_Data) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(total_Data, [train_size, val_size])

In [9]:
tr_dl  = DataLoader(train_dataset,  batch_size=8, shuffle=True,  num_workers=0)
val_dl = DataLoader(val_dataset,  batch_size=8*2, shuffle=True,  num_workers=0)

## Training loop

In [21]:
def fit(net, tr_dl, val_dl, loss=nn.MSELoss(), epochs=3, lr=3e-3, wd=1e-3):   
    print("hello")
    Ltr_hist, Lval_hist = [], []
    
    opt = optim.Adam(net.parameters(), lr=lr, weight_decay=wd)
    print("opt")
    for epoch in trange(epochs):
        print("epoch")
        L = []
        dl = (iter(tr_dl))
        print("dl")
        count_train = 0
        for xb, yb in tqdm(dl, leave=False):
            print("xb,yb loop")
            xb, yb = xb.cuda(), yb.cuda()
            print("xb,yb cuda")
            y_ = net(xb)
            print("y_")
            l = loss(y_, yb)
            opt.zero_grad()
            l.backward()
            opt.step()
            L.append(l.detach().cpu().numpy())
            print(count_train)
            count_train+= 1

        # disable gradient calculations for validation     
        for p in net.parameters(): p.requires_grad = False 

        Lval, Aval = [], []
        val_it = iter(val_dl)
        for xb, yb in tqdm(val_it, leave=False):
            xb, yb = xb.cuda(), yb.cuda()
            y_ = net(xb)
            l = loss(y_, yb)
            Lval.append(l.detach().cpu().numpy())
            Aval.append((y_.max(dim=1)[1] == yb).float().mean().cpu().numpy())

        # enable gradient calculations for next epoch 
        for p in net.parameters(): p.requires_grad = True 
            
        Ltr_hist.append(np.mean(L))
        Lval_hist.append(np.mean(Lval))
        print(f'training loss: {np.mean(L):0.4f}\tvalidation loss: {np.mean(Lval):0.4f}\tvalidation accuracy: {np.mean(Aval):0.2f}')
    return Ltr_hist, Lval_hist

# Network

In [22]:
net = nn.Sequential(
    nn.Conv2d(in_channels=3,  out_channels=6, kernel_size=3, stride=1, padding=1), 
    nn.ReLU(),
    # nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1, padding=1),
    # nn.ReLU(),
    # nn.Conv2d(in_channels=16,  out_channels=32, kernel_size=3, stride=1, padding=1),
    # nn.ReLU(),
    # nn.ConvTranspose2d(in_channels = 32, out_channels=16, kernel_size=3, stride=1, padding=1),
    # nn.ReLU(),
    # nn.ConvTranspose2d(in_channels = 16, out_channels=8, kernel_size=3, stride=1, padding=1),
    # nn.ReLU(),
    # nn.ConvTranspose2d(in_channels = 8, out_channels=4, kernel_size=3, stride=1, padding=1),
    # nn.ReLU(),
    # nn.ConvTranspose2d(in_channels = 4, out_channels=2, kernel_size=3, stride=1, padding=1),
    # nn.ReLU(),
    nn.ConvTranspose2d(in_channels = 6, out_channels=1, kernel_size=3, stride=1, padding=1),
    nn.ReLU()
).cuda()


In [23]:
summary(net, (3,720,1280), 8)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [8, 6, 720, 1280]             168
              ReLU-2          [8, 6, 720, 1280]               0
   ConvTranspose2d-3          [8, 1, 720, 1280]              55
              ReLU-4          [8, 1, 720, 1280]               0
Total params: 223
Trainable params: 223
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 84.38
Forward/backward pass size (MB): 787.50
Params size (MB): 0.00
Estimated Total Size (MB): 871.88
----------------------------------------------------------------


# Testing the fit function

In [20]:
xb = (iter(tr_dl))
xb

<torch.utils.data.dataloader._SingleProcessDataLoaderIter at 0x1ea1793f3d0>

## Training the network

In [24]:
Ltr_hist, Lval_hist = fit(net, tr_dl, val_dl, epochs=1)

hello
opt


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

epoch
dl


HBox(children=(FloatProgress(value=0.0, max=786.0), HTML(value='')))

xb,yb loop
xb,yb cuda


RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (torch.cuda.FloatTensor) should be the same