# Globals

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
global_var = {
    # Resolutions
    'RGB_img_res': (3, 634, 488),
    'batch_size': 64,
    'n_workers': 2,
}

augmentation_parameters = {
    # TODO
}

In [None]:
dataset_root = '/content/drive/MyDrive/NN_project/SSID_dataset/'
save_model_root = '/content/drive/MyDrive/NN_project/'

# Imports

In [None]:
!pip install einops torchsummaryX



In [None]:
import numpy as np
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as TT
import torchvision.transforms.functional as TF

from PIL import Image
from torch.utils.data import DataLoader
from torch.utils.data import Dataset, DataLoader, random_split
from torchsummaryX import summary

# Data

## Data augmentation

In [None]:
# TODO

## Dataset

In [None]:
class SSID_Dataset(Dataset):
    def __init__(self, data_root):
        self.dataset_path = data_root
        self.dir_list = data_root + "Scene_Instances.txt"
        self.data_dir = data_root + "Data/"
        self.data_directiories = []
        self.img_paths = []
        self.target_paths = []
        self.post_processing = TT.Compose([
            TT.ToTensor(),
            TT.Resize((global_var['RGB_img_res'][2], global_var['RGB_img_res'][1]),antialias=None),
        ])

        data_dir_file = open(dataset_root+"Scene_Instances.txt", 'r')
        self.data_directories = [elem.strip() for elem in data_dir_file.readlines()]
        data_dir_file.close()

        for elem in self.data_directories:
          data_path = self.data_dir + elem
          content = sorted(os.listdir(data_path))
          self.target_paths.append(content[0])
          self.img_paths.append(content[1])

    def __getitem__(self, index):
        img_path = self.data_dir + self.data_directories[index] + "/" + self.img_paths[index]
        img = self.post_processing(Image.open(img_path))

        target_path = self.data_dir + self.data_directories[index] + "/" + self.target_paths[index]
        target = self.post_processing(Image.open(target_path))

        return img, target

    def __len__(self):
        return len(self.img_paths)

## Dataloader

In [None]:
dataset = SSID_Dataset(dataset_root)
train_dataset, test_dataset = random_split(dataset, [112, 48])

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size = global_var['batch_size'],
                                           num_workers = global_var['n_workers'],
                                           shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size = global_var['batch_size'],
                                          num_workers = global_var['n_workers'],
                                          shuffle = True)

print("Train data percentage: ", len(train_dataset)/(len(train_dataset)+len(test_dataset)))
print("Test data percentage: ", len(test_dataset)/(len(train_dataset)+len(test_dataset)))

Train data percentage:  0.7
Test data percentage:  0.3


# Loss

In [None]:
class loss_function(nn.Module):
  def __init__(self,truth, pred, epsilon=1e-3):
    super(loss_function,self).__init__()
    self.epsilon = epsilon

  def forward(self,pred,truth):
    return torch.mean(torch.sqrt((pred-truth)**2 + self.epsilon**2))

## Architecture

In [None]:
class W_MSA(nn.module):
  def __init__(self, C, heads, B):
    super(W_MSA, self).__init__()
    self.C = C
    self.B = B
    self.heads = heads
    self.head_dim = C // heads

    self.values = nn.Linear(self.head_dim, self.head_dim, bias=False)
    self.keys = nn.Linear(self.head_dim, self.head_dim, bias=False)
    self.queries = nn.Linear(self.head_dim, self.head_dim, bias=False)
    self.fc_out = nn.Linear(heads*self.head_dim, C)

  def forward(self, values, keys, queries):
    attention = torch.softmax((queries*torch.transpose(keys) / self.head_dim) + self.B)
    out = attention * values

    return self.fc_out(out)


class LeFF(nn.module):
  def __init__(self, dim=32, hidden_dim=128):
    super(LeFF, self).__init__()
    self.dim = dim
    self.hidden_dim = hidden_dim

    self.layer1 = nn.Sequential(nn.Linear(dim, hidden_dim), nn.GELU)
    self.layer2 = nn.Sequential(nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, stride=1, padding=1), nn.GELU)
    self.layer3 = nn.Sequential(nn.Linear(hidden_dim, dim))

  def forward(self, x):
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)

    return x


class TransformerBlock(nn.Module):
  def __init__(self, dim, C, B, heads, dropout):
    self.norm1 = nn.LayerNorm(dim)
    self.w_msa = W_MSA(C, heads, B)
    self.norm2 = nn.LayerNorm(dim)
    self.leff = LeFF()
    self.dropout = nn.Dropout(dropout)

  def forward(self, values, keys, queries):
    w_msa = self.w_msa(values, keys, queries)
    x = self.dropout(self.norm1(w_msa)) # l'input del layer norm1 è sicuramente sbagliato
    x = self.dropout(self.norm2(w_msa))
    x = self.leff(x)

    return x




