In [12]:
# imports
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import cv2
from skimage.util import random_noise
import gc

data_folder = os.path.abspath('./data')
image_folder = os.path.join(data_folder, 'TRANCOS')

print(image_folder)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available() == False:
    print('\033[91m' + "You are training on CPU, are you sure you want to continue?" + '\033[0m')

torch.manual_seed(1)

/home/alix/Desktop/school/semester_5/ECSE-415/project-415/data/TRANCOS
[91mYou are training on CPU, are you sure you want to continue?[0m


<torch._C.Generator at 0x7fd4a81553d0>

In [13]:
# Hyperparameters
img_size = (480, 640, 3)
batch_size = 32


In [14]:
# data augmentation
csv = pd.read_csv(os.path.join(data_folder, 'train.csv'))
csv = pd.Series(csv.counts.values,index=csv.images).to_dict()
images = list(csv.keys())
labels = list(csv.values())

if len(images) != len(labels):
    raise ValueError("Image and label arrays do not have the same size in train dataset.")

for img, label in zip(images, labels):
    image0 = os.path.join(image_folder, img.rstrip('.jpg') + '-0.jpg')
    image1 = os.path.join(image_folder, img.rstrip('.jpg') + '-1.jpg')
    image2 = os.path.join(image_folder, img.rstrip('.jpg') + '-2.jpg')
    image3 = os.path.join(image_folder, img.rstrip('.jpg') + '-3.jpg')

    # if they already exist, no need to redo the data augmentation
    if os.path.exists(image0) and os.path.exists(image1) and os.path.exists(image2) and os.path.exists(image3):
        continue

    image = cv2.normalize(cv2.resize(plt.imread(os.path.join(image_folder, img)), img_size[:2]), None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)

    plt.imsave(image0, image)
    plt.imsave(image1, np.fliplr(image))
    plt.imsave(image2, np.flipud(image))
    plt.imsave(image3, random_noise(image,var=0.2**2))
    

In [15]:
# dataset class

class CustomDataset(Dataset):
    def __init__(self, type):

        self.type = type
        csv = pd.read_csv(os.path.join(data_folder, self.type + '.csv'))

        if self.type == 'train' or self.type == 'valid':
            csv = pd.Series(csv.counts.values,index=csv.images).to_dict()

            # getting the images 
            self.images = []
            self.labels = []
            images = list(csv.keys())
            labels = list(csv.values())
            if len(images) != len(labels):
                raise ValueError("Image and label arrays do not have the same size.")
            
            if self.type == 'valid':
                self.images = [os.path.join(image_folder, img) for img in images]
                self.labels = labels
            else:
                for img, label in zip(images, labels):
                    # adding augmented data to list
                    for i in range(4):
                        self.images.append(img.rstrip('.jpg') + '-' + str(i) + '.jpg')
                        self.labels.append(label)

            if len(self.images) != len(self.labels):
                raise ValueError("Image and label arrays do not have the same size or some values in these arrays are None.")

        elif self.type == 'test':

            # getting the images
            self.images = [os.path.join(image_folder, img) for img in list(csv.images.values.tolist())]

        else:
            raise ValueError("Invalid type in dataset. It has to have one of the following values: 'train', 'valid', 'test'.")
        print("Initiated " + self.type + " dataset of size " + str(len(self.images)) + " and images of shape " + str(list(img_size)))
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, i):
        if self.type == 'test':
            return cv2.normalize(cv2.resize(plt.imread(self.images[i]), img_size[:2]), None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
        else:
            return cv2.normalize(cv2.resize(plt.imread(self.images[i]), img_size[:2]), None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F), self.labels[i]

In [16]:
# declaring datasets and data loaders

train_dataset = CustomDataset('train')
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

valid_dataset = CustomDataset('valid')
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)

test_dataset = CustomDataset('test')
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


Initiated train dataset of size 2492 and images of shape [480, 640, 3]
Initiated valid dataset of size 200 and images of shape [480, 640, 3]
Initiated test dataset of size 421 and images of shape [480, 640, 3]


In [18]:
# model

In [19]:
# training

In [20]:
# training results

In [21]:
# saving model

In [22]:
# calculating accuracy of model