# Model Notebook

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2 as cv
import torch
import torchvision
import torchsummary
import segmentation_models_pytorch as smp
from segmentation_models_pytorch import utils
import pandas as pd
#import torchmetrics
#from torch.utils.tensorboard import SummaryWriter
from PIL import Image
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

In [None]:
def open_images_cv(path, masked = False):
  """
  open images concurrently by using a thread pool
  """
  temp = []
  def read_image(image):
    return cv.resize(cv.cvtColor(cv.imread(os.path.join(path,image)), cv.COLOR_BGR2RGB),(128,128))


  images = os.listdir(path)
  images=sorted(images)
  if masked==True:
    images = [entry for entry in images if "road" in entry]

  with ThreadPoolExecutor() as executor:
    files = executor.map(read_image,images)
    for f in files:
      temp.append(np.asarray(f))
  return np.array(temp)

In [None]:
def load_dataset_paths(path: str):
    train_data_dir = f"{path}/dataset/training/image_2/"
    train_gt_dir = f"{path}/dataset/training/gt_image_2/"

    test_data_dir = f"{path}/dataset/testing/"
    return train_data_dir, train_gt_dir, test_data_dir

In [None]:
train_data_dir, train_gt_dir, test_data_dir = load_dataset_paths('.')

In [None]:
data=open_images_cv(train_data_dir,masked=False)
masks=open_images_cv(train_gt_dir,masked=True)

In [None]:
torch.backends.cudnn.benchmark = True

In [None]:
def test_train_validation_split(data : np.ndarray, true_masks: np.ndarray,train_size = 0.8):
    """
    Splits dataset onto train, test, and validation datasets
    """
    train_set_size = int(len(data) * train_size)
    validation_set_size = int(len(data) * 0.1)
    test_set_size = len(data) - train_set_size - validation_set_size
    
    X_test=data[:test_set_size].transpose((0, 3, 1, 2))
    y_test=true_masks[:test_set_size].transpose((0, 3, 1, 2))


    data=data[test_set_size:]
    true_masks=true_masks[test_set_size:]

    # shuffle
    shuffled_indices = np.random.permutation(len(data))
    shuffled_data = data[shuffled_indices]
    shuffled_masks = true_masks[shuffled_indices]

    X_train=shuffled_data[:train_set_size].transpose((0, 3, 1, 2))
    y_train=shuffled_masks[:train_set_size].transpose((0, 3, 1, 2))

    X_val=shuffled_data[train_set_size:train_set_size+validation_set_size].transpose((0, 3, 1, 2))
    y_val=shuffled_masks[train_set_size:train_set_size+validation_set_size].transpose((0, 3, 1, 2))


    return X_train, y_train, X_test, y_test , X_val, y_val 

In [None]:
X_train, y_train, X_test, y_test , X_val, y_val  = test_train_validation_split(data,binary_mask)

In [None]:
from torch.utils.data import Dataset, DataLoader

class ImageDataset(Dataset):
    def __init__(self, images,masks):
        self.images=images
        self.masks=masks

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image= self.images[index]
        mask=self.masks[index]
        return image,mask

In [None]:
train_dataset = ImageDataset(X_train,y_train)
val_dataset = ImageDataset(X_val, y_val)
test_dataset = ImageDataset(X_test, y_test)

train_dataloader = DataLoader(train_dataset,batch_size=64,shuffle=True)
val_dataloader=DataLoader(val_dataset, batch_size=64, shuffle=True)
test_dataloader=DataLoader(test_dataset, batch_size=64,shuffle=False)

print(f"Train dataloader length: {len(train_dataloader)} batches of size {64}")
print(f"Validation dataloader length: {len(val_dataloader)} batches of size {64}")
print(f"Test dataloader length: {len(test_dataloader)} batches of size {64}")