# Weather Classification

### Load Dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import os
import random
from shutil import copyfile

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.applications.inception_v3 import InceptionV3
from keras.optimizers import SGD
from keras.layers import GlobalAveragePooling2D, Dense, Dropout

from keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import log_loss

In [None]:
weather_classes = ['cloudy', 'foggy', 'rainy', 'shine', 'sunrise']

In [None]:
def prepare_data(src, weather_class):
    files = []
    dir_src = os.path.join(src, weather_class)
    for filename in os.listdir(dir_src):
        file = os.path.join(dir_src, filename)
        if os.path.getsize(file) > 0:
            files.append(filename)
        else:
            print(filename + " is zero length, so ignoring.")

    training_length = int(len(files) * 0.8)
    shuffled_set = random.sample(files, len(files))
    training_set = shuffled_set[0:training_length]
    validation_set = shuffled_set[training_length:]

    for filename in training_set:
        this_file = os.path.join(dir_src, filename)
        destination = os.path.join(src, 'train', weather_class, filename)
        copyfile(this_file, destination)

    for filename in validation_set:
        this_file = os.path.join(dir_src, filename)
        destination = os.path.join(src, 'val', weather_class, filename)
        copyfile(this_file, destination)

In [None]:
src = 'dataset'
training_dir = os.path.join(src, 'train')
validation_dir = os.path.join(src, 'val')
os.mkdir(training_dir)
os.mkdir(validation_dir)

for w in weather_classes:
    os.mkdir(os.path.join(training_dir, w))
    os.mkdir(os.path.join(validation_dir, w))
    prepare_data(src, w)
print('Done')

### Visualization

In [None]:
num_imgs = {}
for w in weather_classes:
    num_imgs[w] = len(os.listdir(os.path.join(training_dir, w)))
plt.figure(figsize=(9,6))
plt.bar(range(len(num_imgs)), list(num_imgs.values()), align='center')
plt.xticks(range(len(num_imgs)), list(num_imgs.keys()))
plt.title('Distribution in Training Dataset')
plt.show()

In [None]:
num_imgs = {}
for w in weather_classes:
    num_imgs[w] = len(os.listdir(os.path.join(validation_dir, w)))
plt.figure(figsize=(9,6))
plt.bar(range(len(num_imgs)), list(num_imgs.values()), align='center')
plt.xticks(range(len(num_imgs)), list(num_imgs.keys()))
plt.title('Distribution in Validation Dataset')
plt.show()

### Preprocess

In [None]:
training_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

validation_datagen = ImageDataGenerator(rescale=1./255)

training_generator = training_datagen.flow_from_directory(
    training_dir,
    target_size=(150,150),
    batch_size=32,
    class_mode='categorical')

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(150,150),
    batch_size=32,
    class_mode='categorical')

### Training

In [None]:
inception = InceptionV3(weights='imagenet', include_top=False)
x = inception.output
x = GlobalAveragePooling2D()(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.2)(x)

predictions = Dense(5, activation='softmax')(x)

model = Model(inputs=inception.input, outputs=predictions)
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit_generator(
    training_generator,
    steps_per_epoch=1200//32,
    validation_data=validation_generator, 
    validation_steps=300//32,
    epochs=30)

### Evaluation

In [None]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

In [None]:
def preprocess_image(path):
    img = load_img(path, target_size=(150, 150))
    img = img_to_array(img)                    
    img = np.expand_dims(img, axis=0)         
    img /= 255.
    return img

test_df = pd.read_csv('dataset/test.csv')
test_ids = [str(i) for i in test_df['Image_id'].tolist()]

test_imgs = [os.path.join('dataset/test', i) for i in test_ids]
test_imgs = np.vstack([preprocess_image(i) for i in test_imgs])

y_test = test_df['labels']
y_pred = model.predict(test_imgs, batch_size=1, verbose=1)

loss = log_loss(y_test, y_pred, eps=1e-15, normalize=True, sample_weight=None, labels=None)
print(loss)

In [None]:
'''
Inspiration
1. https://github.com/vijayg15/Keras-MultiClass-Image-Classification
'''