# Valeo: Image Classification: day/night/weather
## Baseline Pipeline

The goal here is to create a baseline pipeline to do snow(day/night?) classification, we'll be using a simple CNN network as baseline

## Loading the dataset

Load the dataset from the weather type and data type (train \ test \ validation)

In [None]:
import os
import numpy as np
from PIL import Image

# Choose which task to work on: fog, night, rain or snow
weather_type = 'snow'

def load_images(weather_type, data_type):
    """
    Load images from the weather_type/data_type folder
    :param weather_type: fog or night or rain or snow
    :type weather_type: String
    :param data_type: train or val or test or train_ref or val_ref or test_ref
    :type data_type: String
    :return: list of images and list of respective paths
    :rtype: Lists
    """
    data = []
    data_paths = []
    counter = 0
    path = '../input/acdc-dataset/dataset ACDC/rgb_anon/' + weather_type + '/' + data_type + '/'

    # For each Gopro directory, for each image, store the image and its path in train and train_paths respectively
    for directory_name in os.listdir(path):
        gopro_path = path + directory_name
        for image_name in os.listdir(gopro_path):
            image_path = gopro_path + "/" + image_name
            image = Image.open(image_path)
            data.append(image)
            data_paths.append(image_path)

            # Counter to see progression
            counter += 1
            if counter%100 == 0:
                print(str(counter) + " " + data_type + " images loaded")
    
    return data, data_paths

Load the dataset of night, respectively set as varaibles: `train_day`, `train_night`, `valid_day`, `valid_night` 

In [None]:
train_day, train_day_paths = load_images('night', 'train_ref')
train_night, train_night_paths = load_images('night', 'train')
valid_day, valid_day_paths = load_images('night', 'val_ref')
valid_night, valid_night_paths = load_images('night', 'val')

## EDA
Let'see some sample day and night images

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import cv2
%matplotlib inline

In [None]:
img_day = cv2.imread(str(train_day_paths[0]))
img_night = cv2.imread(str(train_night_paths[0]))

In [None]:
# Resizing image to height and width of 500
img_day = cv2.resize(img_day, (500,500))
img_night = cv2.resize(img_night, (500,500))

In [None]:
# Converting from BGR format to RGB format for visualization
day_rgb = cv2.cvtColor(img_day, cv2.COLOR_BGR2RGB)
night_rgb = cv2.cvtColor(img_night, cv2.COLOR_BGR2RGB)

In [None]:
# Visualizing images using matplotlib
fig, ax = plt.subplots(1,2,figsize=(10,15))
ax[0].imshow(day_rgb)
ax[0].set_title('Day')
ax[1].imshow(night_rgb)
ax[1].set_title('Night')

Images taken during day are generally brighter than images taken during night. We can use this fact to build a simple baseline model.

For this, we need to get the average brightness in an image. RGB image does not help much in this case.

We can convert the image from RGB colorspace to Hue Saturation Value (HSV) colorspace.
The Value in HSV indicates brightness at different positions. Therefore, utilize image from this colorspace to build a basic classifier.

In [None]:
# converting image to HSV colorspace
day_hsv = cv2.cvtColor(img_day, cv2.COLOR_BGR2HSV)
night_hsv = cv2.cvtColor(img_night, cv2.COLOR_BGR2HSV)

In [None]:
# Visualizing images using matplotlib
fig, ax = plt.subplots(1,2,figsize=(10,15))
ax[0].imshow(day_hsv)
ax[0].set_title('Day')
ax[1].imshow(night_hsv)
ax[1].set_title('Night')

I don't think this looks very useful. Maybe we could split the channels and visualize them.

In [None]:
# splitting channels of day and night hsv images
dh, ds, dv = cv2.split(day_hsv)
nh, ns, nv = cv2.split(night_hsv)

In [None]:
fig, ax = plt.subplots(2,3,figsize=(15,10))
ax[0][0].imshow(dh)
ax[0][0].set_title('Hue')
ax[0][1].imshow(ds)
ax[0][1].set_title('Saturation')
ax[0][2].imshow(dv)
ax[0][2].set_title('Value')

ax[1][0].imshow(nh)
ax[1][0].set_title('Hue')
ax[1][1].imshow(ns)
ax[1][1].set_title('Saturation')
ax[1][2].imshow(nv)
ax[1][2].set_title('Value')

It seems like that the **Value** channel has higher pixel values where image is bright.

## Baseline model (Average brightness)

Now find average brightness of day and night images, and we can use this as threshold to classify images.

In [None]:
# arrays to store avg. brightness from Value channel of each image
day_brightness = []
night_brightness = []

In [None]:
for curr_file in train_day_paths:
  img = cv2.imread(str(curr_file)) # reading img 
  img = cv2.resize(img, (500, 500)) # resizing image
  img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # converting to hsv
  avg_brightness = np.mean(img[:, :, 2]) # calculating average value of Value channel from HSV image
  day_brightness.append(avg_brightness) # appending to array

In [None]:
for curr_file in train_night_paths:
  img = cv2.imread(str(curr_file)) # reading img 
  img = cv2.resize(img, (500, 500)) # resizing image
  img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # converting to hsv
  avg_brightness = np.mean(img[:, :, 2]) # calculating average value of Value channel from HSV image
  night_brightness.append(avg_brightness) # appending to array

In [None]:
# calculating average brightness
day_avg_brightness = sum(day_brightness)/len(day_brightness)
night_avg_brightness = sum(night_brightness)/len(night_brightness)
day_avg_brightness, night_avg_brightness

Visualize distribution of brightness in day and night images

In [None]:
fig, ax = plt.subplots(1,2,figsize=(10,5))
ax[0].hist(day_brightness)
ax[0].set_title('Day')
ax[1].hist(night_brightness)
ax[1].set_title('Night')

Use a threshold=90 for average brightness, which covers much of the distribution for Day images as well as Night images

In [None]:
fig, ax = plt.subplots(1,2,figsize=(10,5))
ax[0].hist(day_brightness)
ax[0].set_title('Day')
ax[0].axvline(90, color='red')
ax[1].hist(night_brightness)
ax[1].set_title('Night')
ax[1].axvline(90, color='red')

## Validation

A simple function that takes in threshold as input, classifies images in validation set and returns the accuracy

In [None]:
def validate(threshold=90):

  corrects = 0 # tracks running correct values
  total = len(valid_day_paths) + len(valid_night_paths) # total number of images in validaton set

  for curr_file in valid_day_paths:
    img = cv2.imread(str(curr_file)) # reading image
    img = cv2.resize(img, (500,500)) # resizing image to standard size
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # converting to HSV
    avg_brightness = np.mean(img[:, :, 2]) # calculating average value of Value channel in HSV image
    if avg_brightness > threshold:
      corrects += 1 # To classify image as Day, the avg. brightness should be greater than threshold brightness
    
  for curr_file in valid_night_paths:
    img = cv2.imread(str(curr_file)) # reading image
    img = cv2.resize(img, (500,500)) # resizing image to standard size
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # converting to HSV
    avg_brightness = np.mean(img[:, :, 2]) # calculating average value of Value channel in HSV image
    if avg_brightness < threshold:
      corrects += 1 # To classify image as Night, the avg. brightness should be less than threshold brightness
  
  accuracy = (corrects * 1.0)/total # calculating percentage of correctly classified images
  return accuracy

Try to validate for threshold = 90

In [None]:
validate(threshold=90)

In [None]:
valid_scores = []
for thresh in range(40,120,10):
    valid_scores.append(validate(threshold = thresh))

In [None]:
import seaborn as sns
plt.figure(figsize=(8,4), tight_layout=True)
colors = sns.color_palette('pastel')
plt.plot(range(40,120,10), valid_scores)
plt.xlabel('Threshold')
plt.ylabel('Validate Score')
plt.title('Score according to Threshold')
plt.show()

In [None]:
max(valid_scores)