## Simple CNN
The goal here is to create a baseline pipeline to do snow(daydd/night?) classification, we'll be using a simple CNN network as baseline

### Loading the dataset

Load the dataset from the weather type and data type (train \ test \ validation)

In [None]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
%matplotlib inline

# Choose which task to work on: fog, night, rain or snow
def load_images(weather_type, data_type):
    """
    Load images from the weather_type/data_type folder
    :param weather_type: fog or night or rain or snow
    :type weather_type: String
    :param data_type: train or val or test or train_ref or val_ref or test_ref
    :type data_type: String
    :return: list of images and list of respective paths
    :rtype: Lists
    """
    data = []
    data_paths = []
    counter = 0
    path = '../input/acdc-dataset/dataset ACDC/rgb_anon/' + weather_type + '/' + data_type + '/'

    # For each Gopro directory, for each image, store the image and its path in train and train_paths respectively
    for directory_name in os.listdir(path):
        gopro_path = path + directory_name
        for image_name in os.listdir(gopro_path):
            image_path = gopro_path + "/" + image_name
            image = Image.open(image_path)
            data.append(image)
            data_paths.append(image_path)

            # Counter to see progression
            counter += 1
            if counter%100 == 0:
                print(str(counter) + " " + data_type + " images loaded")
    
    return data, data_paths

Load the dataset of night, respectively set as varaibles: `train_day`, `train_night`, `valid_day`, `valid_night` 

In [None]:
train_day, train_day_paths = load_images('night', 'train_ref')
train_night, train_night_paths = load_images('night', 'train')
valid_day, valid_day_paths = load_images('night', 'val_ref')
valid_night, valid_night_paths = load_images('night', 'val')

### Build CNN Model

Load functions:
- `accuracy()`: the function to evaluate the accuracy
- `train_epoch()`: perform one Training epoch
- `valid_epoch()`: perform one Validation epoch
- `show_batch()`: plot images from batch

In [None]:
import utils
import train_val_scripts

Import Troch libraries

In [None]:
# importing required libraries

import torch
import torch.nn as nn
from torchvision import transforms
from tqdm import tqdm

In [None]:
all_files = train_day_paths + train_night_paths

In [None]:
# arrays to store avg. mean and std of Value channel from each image
means = []
stds = []

# simple loop to iterate over every image and append mean and std of V channel from HSV image.
for curr_file in tqdm(all_files):
  img = cv2.imread(str(curr_file))
  img = cv2.resize(img, (500,500))
  img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  img = img/255.0
  means.append(np.mean(img[:, :, 2]))
  stds.append(np.std(img[:, :, 2]))

In [None]:
# avg mean and std for normalization
avg_mean = sum(means)/len(means)
avg_std = sum(stds)/len(stds)

In [None]:
avg_mean, avg_std

### Creating PyTorch Dataset and DataLoader

In [None]:
# PyTorch custom dataset class to load image and convert to HSV
class HSV_Dataset(nn.Module):
  def __init__(self, day_files, night_files):
    super().__init__()
    self.files = day_files + night_files
    
    # augmentations to convert image to pytorch tensor and normalize using mean and std
    self.augs = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize([0.32225], [0.21580])
    ])

  def __getitem__(self, idx):
    # reading image
    img = cv2.imread(str(self.files[idx]))
    # resizing to standard size
    img = cv2.resize(img, (500,500))
    # converting to HSV colorspace
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    # splitting channels of HSV image
    h,s,v = cv2.split(img)
    # normalizing v channel of HSV image
    v = self.augs(v)
    
    # extracting label from pathlib path
    cls = str(self.files[idx]).split('/')[-3]

    # label = 1 if day else 0
    if cls == 'day':
      label = 1
    else: 
      label = 0
      
    return v, label

  def __len__(self):
    return len(self.files)

In [None]:
# training and validation datasets
train_ds = HSV_Dataset(train_day_paths, train_night_paths)
valid_ds = HSV_Dataset(valid_day_paths, valid_night_paths)

In [None]:
# training and validation dataloaders
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)
valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)

In [None]:
classes = ['night', 'day']
x,y = next(iter(valid_dl))

In [None]:
# visualization of sample images
show_batch(x, [classes[i] for i in y], nimgs=4, mean=torch.Tensor([0.32225]), std=torch.Tensor([0.21580]), denorm=False)

### Build a CNN Model

In [None]:
# Simple function that returns a Conv-BatchNorm-ReLU layer
def conv_bn_relu(ni, nf, stride=2, bn=True, act=True):
  layers = [nn.Conv2d(ni, nf, kernel_size=3, stride=2, padding=1, bias=not bn)] # no need of bias if using batchnorm
  if bn:
    layers.append(nn.BatchNorm2d(nf))
  if act:
    layers.append(nn.ReLU(inplace=True))
  return nn.Sequential(*layers)

In [None]:
# Simple layer to flatten output of previous layer
class Flatten(nn.Module):
  def forward(self, x):
    return x.squeeze()

In [None]:
# Simple 5-layer FCN-CNN model that takes as input a V channel of HSV image.
simple_model = nn.Sequential(
    conv_bn_relu(1, 8),
    conv_bn_relu(8, 16),
    conv_bn_relu(16, 32),
    conv_bn_relu(32, 8),
    conv_bn_relu(8, 2, bn=False, act=False), # no batchnorm and relu for last layer
    nn.AdaptiveAvgPool2d(1), # taking mean across spatial dimensions, these are logits
    Flatten()
)

In [None]:
simple_model

In [None]:
simple_model = simple_model.to(device='cuda:0')

In [None]:
# Using Softmax CrossEntropy Loss
criterion = nn.CrossEntropyLoss()

# Adam optimizer with lr=1e-4
opt = torch.optim.Adam(simple_model.parameters(), lr=1e-4) 

# Cosine Annealing Learning Rate Scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt, len(train_dl)*15, eta_min=1e-6) 

In [None]:
max_acc = 0.0 # Track maximum validation accuracy achieved

for epoch in range(15):
  best = False # Flag to detect best model

  # Training phase
  train_loss, train_acc = train_epoch(simple_model, train_dl, criterion, opt, scheduler)

  # Validation phase
  valid_loss, valid_acc = valid_epoch(simple_model, valid_dl, criterion)


  if valid_acc > max_acc: # Saving best model
    max_acc = valid_acc
    torch.save(simple_model.state_dict(), 'simple_best_model.pth')
    best = True

  print('-'*25 + f'Epoch {epoch+1}' + '-'*25)
  print(f'Train Loss:{train_loss} Train Accuracy:{train_acc}')
  print(f'Valid Loss:{valid_loss} Valid Accuracy:{valid_acc}')
  if best:
    print(f'Found better model!')
  print('-'*58)

## 