In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision as tv

import os
import cv2
import zipfile
import numpy as np
import matplotlib.pyplot as plt
from tqdm.autonotebook import tqdm

  from tqdm.autonotebook import tqdm


In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
class Eye_tracking_dataset_3classes(torch.utils.data.Dataset):
    def __init__(self, path_dir1:str, path_dir2:str, path_dir3:str):
        super().__init__()
        self.image_path = ""
        self.class_id = 0

        self.path_dir1 = path_dir1
        self.path_dir2 = path_dir2
        self.path_dir3 = path_dir3

        self.dir1_list = sorted(os.listdir(path_dir1))
        self.dir2_list = sorted(os.listdir(path_dir2))
        self.dir3_list = sorted(os.listdir(path_dir3))

    def __len__(self):
        return (len(self.dir1_list) + len(self.dir2_list) + len(self.dir3_list))

    def __getitem__(self, index):
        if index < len(self.dir1_list):
            self.class_id = 0
            self.image_path = os.path.join(self.path_dir1, self.dir1_list[index])
        elif index < (len(self.dir1_list) + len(self.dir2_list)):
            self.class_id = 1
            index -= len(self.dir1_list)
            self.image_path = os.path.join(self.path_dir2, self.dir2_list[index])
        elif index < (len(self.dir1_list) + len(self.dir2_list) + len(self.dir3_list)):
            self.class_id = 2
            index -= (len(self.dir1_list) + len(self.dir2_list))
            self.image_path= os.path.join(self.path_dir3, self.dir3_list[index])

        image = cv2.imread(self.image_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = image.astype(np.float32)
        image = cv2.resize(image, (100, 100), interpolation=cv2.INTER_AREA)
        image = image/255.0

        image = image.transpose((2, 0, 1))

        t_image = torch.from_numpy(image)
        t_class_id = torch.tensor(self.class_id)

        # return image

        return {'image': t_image, 'label': t_class_id}

In [None]:
train_left_path = '/content/drive/My Drive/Colab Notebooks/DGW_dataset_train/left/'
train_center_path = '/content/drive/My Drive/Colab Notebooks/DGW_dataset_train/center/'
train_right_path = '/content/drive/My Drive/Colab Notebooks/DGW_dataset_train/right/'

train_dataset = Eye_tracking_dataset_3classes(train_left_path, train_center_path, train_right_path)

FileNotFoundError: ignored

In [None]:
train_dataset[0]['image']

tensor([[[0.6360, 0.6685, 0.7037,  ..., 0.3892, 0.3735, 0.3194],
         [0.6515, 0.6766, 0.7111,  ..., 0.3953, 0.3156, 0.2767],
         [0.6578, 0.6919, 0.6949,  ..., 0.3241, 0.2731, 0.2655],
         ...,
         [0.0938, 0.0985, 0.1174,  ..., 0.1856, 0.2011, 0.1682],
         [0.1100, 0.1027, 0.0930,  ..., 0.4879, 0.4806, 0.4647],
         [0.0985, 0.1081, 0.0919,  ..., 0.5662, 0.5514, 0.5485]],

        [[0.9715, 0.9816, 0.9865,  ..., 0.3403, 0.3243, 0.2819],
         [0.9644, 0.9728, 0.9859,  ..., 0.3172, 0.2778, 0.2298],
         [0.9625, 0.9727, 0.9827,  ..., 0.2710, 0.2309, 0.2138],
         ...,
         [0.0881, 0.0826, 0.0689,  ..., 0.1503, 0.1408, 0.1566],
         [0.0748, 0.0821, 0.0793,  ..., 0.4233, 0.4411, 0.4573],
         [0.0901, 0.0738, 0.0693,  ..., 0.5081, 0.4916, 0.5017]],

        [[1.0000, 1.0000, 1.0000,  ..., 0.1818, 0.1739, 0.1491],
         [1.0000, 1.0000, 1.0000,  ..., 0.2020, 0.1429, 0.1309],
         [1.0000, 1.0000, 1.0000,  ..., 0.1718, 0.1257, 0.

In [None]:
len(train_dataset)

15473

In [None]:
batch_size = 16

train_loader = torch.utils.data.DataLoader(
    train_dataset, shuffle=True,
    batch_size=batch_size, num_workers=20, drop_last=True
)



In [None]:
class Neural_Network(nn.Module):
    def __init__(self):
        super().__init__()

        self.act = nn.LeakyReLU(0.2)
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv0 = nn.Conv2d(3, 32, 3, stride=1, padding=0)
        self.conv1 = nn.Conv2d(32, 32, 3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=0)
        self.conv3 = nn.Conv2d(64, 128, 3, stride=1, padding=0)

        self.adaptivepool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(128, 20)
        self.linear2 = nn.Linear(20, 3)

    def forward(self, x):
        out = self.conv0(x)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv1(out)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv2(out)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv3(out)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.adaptivepool(out)
        out = self.flatten(out)
        out = self.linear1(out)
        out = self.act(out)
        out = self.linear2(out)

        return out

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
model = Neural_Network()

In [None]:
model

Neural_Network(
  (act): LeakyReLU(negative_slope=0.2)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (adaptivepool): AdaptiveAvgPool2d(output_size=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear1): Linear(in_features=128, out_features=20, bias=True)
  (linear2): Linear(in_features=20, out_features=3, bias=True)
)

In [None]:
count_parameters(model)

105139

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))

In [None]:
def accuracy(prediction, label):
    answer = F.softmax(prediction.detach()).numpy().argmax(1) == label.numpy().argmax(1)
    return answer.mean()

In [None]:
epochs = 20

for epoch in range(epochs):
    loss_val = 0
    accuracy_val = 0
    for sample in (pbar := tqdm(train_loader)):
        image, label = sample['image'], sample['label']
        optimizer.zero_grad()

        label = F.one_hot(label, 3).float()
        prediction = model(image)

        loss = loss_fn(prediction, label)

        loss.backward()
        loss_item = loss.item()
        loss_val += loss_item

        optimizer.step()

        accuracy_current = accuracy(prediction, label)
        accuracy_val += accuracy_current

    pbar.set_description(f"loss: {loss_item:.5f}\taccuracy: {accuracy_current:.3f}")
    print(loss_val/len(train_loader))
    print(accuracy_val/len(train_loader))

In [None]:
val_left_path = '/content/drive/My Drive/Colab Notebooks/DGW_dataset_val/left/'
val_center_path = '/content/drive/My Drive/Colab Notebooks/DGW_dataset_val/center/'
val_right_path = '/content/drive/My Drive/Colab Notebooks/DGW_dataset_val/right/'

val_dataset = Eye_tracking_dataset_3classes(val_left_path, val_center_path, val_right_path)

In [None]:
batch_size = 16

val_loader = torch.utils.data.DataLoader(
    val_dataset, shuffle=True,
    batch_size=batch_size, num_workers=20, drop_last=False
)



In [None]:
loss_val = 0
accuracy_val = 0
for sample in (pbar := tqdm(val_loader)):
    with torch.no_grad():
        image, label = sample['image'], sample['label']

        label = F.one_hot(label, 3).float()
        prediction = model(image)

        loss = loss_fn(prediction, label)
        loss_item = loss.item()
        loss_val += loss_item

        accuracy_current = accuracy(prediction, label)
        accuracy_val += accuracy_current

pbar.set_description(f"loss: {loss_item:.5f}\taccuracy: {accuracy_current:.3f}")
print(loss_val/len(val_loader))
print(accuracy_val/len(val_loader))

  0%|          | 0/451 [00:00<?, ?it/s]

  answer = F.softmax(prediction.detach()).numpy().argmax(1) == label.numpy().argmax(1)


1.0900605092291822
0.3549057649667406
