# Load and train

In [None]:
import csv
import h5py
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

import xgboost as xgb

from skimage.transform import resize
from skimage import segmentation
from skimage.morphology import watershed
from skimage import measure
from skimage import morphology
from scipy.stats import moment 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, scale

from imblearn.over_sampling import RandomOverSampler

from keras.layers import Dense, Input
from keras.layers import concatenate, merge
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import MaxPooling2D, LeakyReLU
from keras.layers import Conv2D
from keras.models import load_model
from keras.models import Sequential
from keras.utils import to_categorical
from keras.models import Model
from keras.utils import plot_model
from keras.preprocessing.image import ImageDataGenerator

%matplotlib inline

# Set global variables and model hyper-parameters

In [None]:
class CONFIG():
    def __init__(self, path, data_location='data/', train_images_location='data/train_images/',
                 test_images_location='/data/test_images/', image_size=224, n_classes=121, batch_size=64,
                 n_epochs=10):
        self.path = path
        self.data_location = self.path + data_location
        self.train_images_location = self.path + train_images_location
        self.test_images_location = self.path + test_images_location 
        self.image_size = image_size
        self.n_classes = n_classes
        self.batch_size = batch_size
        self.n_epochs = n_epochs

In [None]:
cfg = CONFIG(path='/Users/guillaumecorda/Desktop/UvA/Applied Machine Learning/Kaggle/')

# Load data and labels

In [None]:
filenames = [i for i in os.listdir(cfg.train_images_location) if i.endswith('.jpg')]
with open(cfg.data_location + 'train_onelabel.csv', mode='r') as infile:
    reader = csv.reader(infile)
    file_to_class = {el[0]:el[1] for el in reader}

with open(cfg.data_location + 'train_onelabel.csv', mode='r') as infile:
    reader = csv.reader(infile)
    class_counts = {}
    for row in reader:
        if(row[1] != 'class'):
            class_counts[int(row[1])] = class_counts.get(int(row[1]), 0) + 1
    max_nr = max(class_counts.values())
    for key, value in class_counts.items():
        class_counts[key] = int(class_counts[key] + (max_nr - class_counts[key])/6)

X = np.empty([len(filenames), cfg.image_size, cfg.image_size,1])
Y_ = np.empty([len(filenames)])
Y = np.empty([sum(class_counts.values()),cfg.n_classes])

# Helper functions

In [None]:
def get_padding(i):
    
    if i%2 == 0: 
        return (int(i/2), int(i/2))
    else:
        return (int(i/2-.5), int(i/2+.5))

In [None]:
def pad_image(img):
    
    H, W = img.shape
    if H == W:
        return img
    elif H > W:
        return np.pad(img, ((0,0), get_padding(H-W)), 'constant')
    
    else:
        return np.pad(img, (get_padding(W-H), (0,0)), 'constant')

In [None]:
def resize_image(img):
    return resize(img, (cfg.image_size, cfg.image_size), mode='reflect')

# Pre-processing

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
for i in range(len(filenames)):
    img = mpimg.imread(cfg.train_images_location + filenames[i])
    img = np.absolute(np.divide(img.astype(float), 255) - 1.0)
    img = resize_image(pad_image(img))
    img = img.reshape(cfg.image_size, cfg.image_size,1)
    X[i] = img
    Y_[i] = int(file_to_class[filenames[i]])

# Class imbalance

In [None]:
X = X.reshape(24204, cfg.image_size*cfg.image_size)

sm = RandomOverSampler(ratio=class_counts)
X, Y_ = sm.fit_sample(X, Y_)

X = X.reshape(len(X), cfg.image_size, cfg.image_size, 1)
for i in range(len(Y_)):
    Y[i][int(Y_[i])] = 1.0

In [None]:
for i in range(total,X.shape[0]):
    X[i] = np.rot90(X[i],(1+(i%4)))

# Standardize data

In [None]:
X.shape

X = X.reshape((X.shape[0], cfg.image_size, cfg.image_size, 1))

for i in range(X.shape[0]):    
    scaler = StandardScaler()
    X = scaler.fit_transform(X[i])

In [None]:
scale(X)

# 1st Network

## Load Previous Model

In [None]:
cnn_model = load_model(cfg.path+'output_guillaume/models/model__428.h5')

In [None]:
#select output
intermediate_cnn_model = Model(inputs=cnn_model.input,
                                 outputs=cnn_model.layers[-2].output)

cnn_output = intermediate_cnn_model.predict(X)

In [None]:
cnn_output.shape

## Create new CNN

In [None]:
cnn_model = Sequential()

cnn_model.add(Conv2D(64, kernel_size=(3, 3), padding='same', input_shape=X[0].shape))
cnn_model.add(LeakyReLU())
cnn_model.add(MaxPooling2D(pool_size=(2, 2)))
cnn_model.add(Conv2D(32, kernel_size=(3, 3), padding='same'))
cnn_model.add(LeakyReLU())
cnn_model.add(MaxPooling2D(pool_size=(2, 2)))
cnn_model.add(Conv2D(32, kernel_size=(3, 3), padding='same'))
cnn_model.add(LeakyReLU())
#model.add(MaxPooling2D(pool_size=(2, 2)))
cnn_model.add(Dropout(0.5))

cnn_model.add(Flatten())
cnn_model.add(Dense(512))
cnn_model.add(LeakyReLU())
cnn_model.add(Dropout(0.5))
cnn_model.add(Dense(512))
cnn_model.add(LeakyReLU())
cnn_model.add(Dropout(0.5))
cnn_model.add(Dense(cfg.n_classes, activation='softmax'))

cnn_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

cnn_model.summary()

In [None]:
history = cnn_model.fit(
            X, 
            Y,
            epochs=2, 
            batch_size=cfg.batch_size,
            verbose=1)

In [None]:
#select output
intermediate_cnn_model = Model(inputs=cnn_model.input,
                                 outputs=cnn_model.layers[-2].output)

cnn_output = intermediate_cnn_model.predict(X)

In [None]:
cnn_model.save(cfg.path+'/output_guillaume/models/model_new_cnn_224_2.h5')

## Keras pp

In [None]:
# Preprocessing
train_datagen = ImageDataGenerator(
        rotation_range=360,
        width_shift_range=0.1,
        height_shift_range=0.1,
        rescale=1./255,
        shear_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        featurewise_std_normalization=True)

val_datagen = ImageDataGenerator(rescale=1./255, featurewise_std_normalization=True)

train_generator = train_datagen.flow_from_directory(
                cfg.path+'data_formatted/train_images',  
                target_size=(cfg.image_size, cfg.image_size),  
                batch_size=cfg.batch_size,
                class_mode='categorical',
                color_mode='grayscale')

val_generator = val_datagen.flow_from_directory(
                cfg.path+'data_formatted/val_images',  
                target_size=(cfg.image_size, cfg.image_size),  
                batch_size=cfg.batch_size,
                class_mode='categorical',
                color_mode='grayscale')

train_subm_generator = train_datagen.flow_from_directory(
                cfg.path+'data_submission_train/',  
                target_size=(cfg.image_size, cfg.image_size),  
                batch_size=cfg.batch_size,
                class_mode='categorical',
                color_mode='grayscale')

subm_generator = val_datagen.flow_from_directory(
                cfg.path+'data_submission_test/',  
                target_size=(cfg.image_size, cfg.image_size),  
                batch_size=1,
                class_mode='categorical',
                color_mode='grayscale')

In [None]:
cnn_model = Sequential()

cnn_model.add(Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu', input_shape=(cfg.image_size, cfg.image_size, 1)))
cnn_model.add(Conv2D(16, kernel_size=(3, 3), padding='same', activation='relu'))
cnn_model.add(MaxPooling2D(pool_size=(3, 3), strides=2))
cnn_model.add(Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu'))
cnn_model.add(Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu'))
cnn_model.add(MaxPooling2D(pool_size=(3, 3), strides=2))
cnn_model.add(Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'))
cnn_model.add(Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu'))
cnn_model.add(MaxPooling2D(pool_size=(3, 3), strides=2))

cnn_model.add(Flatten())
cnn_model.add(Dense(512, activation='relu'))
cnn_model.add(Dropout(0.5))
cnn_model.add(Dense(512, activation='relu'))
cnn_model.add(Dropout(0.5))
cnn_model.add(Dense(cfg.n_classes, activation='softmax'))


#sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
cnn_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

cnn_model.summary()

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
history = cnn_model.fit_generator(
        train_generator,
        steps_per_epoch=200,
        epochs=5,
        validation_data=val_generator,
        validation_steps=80)

In [None]:
#select output
intermediate_cnn_model = Model(inputs=cnn_model.input,
                                 outputs=cnn_model.layers[-2].output)

cnn_output = intermediate_cnn_model.predict(X)

# 2nd Network

## Compute new features intrinsic to images

In [None]:
def compute_new_features(filenames):
    N = len(filenames)
    output = np.zeros((N, 10))
    classes = pd.read_csv(cfg.data_location + 'train_onelabel.csv')
    for i, image in enumerate(filenames):
        image = mpimg.imread(cfg.train_images_location + filenames[i])
        h = image.shape[0]
        w = image.shape[1]
        h_ = h/w
        w_ = w/h
        h_2 = (h/w)**2
        w_2 = (w/h)**2
        m = np.mean(image)
        moment_2 = moment(image.flatten(), moment=2)
        moment_3 = moment(image.flatten(), moment=3)
        label = classes['class'].loc[classes['image']==filenames[i]].values[0]
        output[i] = np.array([h, w, h_, w_, h_2, w_2, m, moment_2, moment_3, label])
    return output

In [None]:
df = compute_new_features(filenames)

In [None]:
df[:,:9].shape

In [None]:
def format_targets(y_true):
    N = len(y_true)
    output = np.zeros(shape=(N, 121))
    for i in range(N):
        j=0
        while j != y_true[i]:
            j+=1
        output[i][j]=1
    return output

In [None]:
target = format_targets(df[:,9])

## Train FC Network

In [None]:
fc_model = Sequential()

fc_model.add(Dense(512, activation='relu',  input_shape=df[:,:9][0].shape))
fc_model.add(Dropout(0.5))
fc_model.add(Dense(512, activation='relu'))
fc_model.add(Dropout(0.5))
fc_model.add(Dense(cfg.n_classes, activation='softmax'))

fc_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

fc_model.summary()

In [None]:
history = fc_model.fit(
    df[:,:9], 
    target,
    epochs=cfg.n_epochs, 
    batch_size=cfg.batch_size,
    verbose=1)

In [None]:
#select output
intermediate_layer_model = Model(inputs=fc_model.input,
                                 outputs=fc_model.layers[-2].output)

fc_output = intermediate_layer_model.predict(df[:,:9])

In [None]:
fc_output.shape

In [None]:
model.save(cfg.path+'/output_guillaume/models/model_new_fc.h5')

# Merge Networks

In [None]:
new_input = np.concatenate([cnn_output, fc_output])
print(new_input.shape)
new_target = np.concatenate([Y, target])
print(new_target.shape)

In [None]:
merge_model = Sequential()

merge_model.add(Dense(512, activation='relu', input_shape=new_input[0].shape))
merge_model.add(Dropout(0.5))
merge_model.add(Dense(512, activation='relu'))
merge_model.add(Dropout(0.5))
merge_model.add(Dense(cfg.n_classes, activation='softmax'))

merge_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

merge_model.summary()

In [None]:
history = merge_model.fit(x = new_input,
                        y = new_target,
                        epochs=10, 
                        batch_size=cfg.batch_size,
                        verbose=1)

# Predict

In [None]:
filenames = [i for i in os.listdir(cfg.path+'/data/test_images') if i.endswith('.jpg')]

labels = pd.DataFrame(filenames, columns=['image'])
labels['class'] = -1

In [None]:
model = load_model(cfg.path+'/output_guillaume/models/model_new.h5')

In [None]:
total = len(filenames)
for i in range(total):
    img = mpimg.imread(cfg.test_images_location + filenames[i])
    img = np.absolute(np.divide(img.astype(float), 255) - 1.0)
    img = resize_image(pad_image(img))
    img = img.reshape(1,cfg.image_size, cfg.image_size,1)
    labels.loc[labels['image'] == filenames[i], 'class'] = cnn_model.predict_classes(img, verbose=0)[0]

labels.sort_values(by='class')
labels['class'] = labels['class'].astype(int)
labels.sample(n=5)

In [None]:
labels

In [None]:
labels.to_csv(cfg.path+'output_guillaume/predictions/model_new_cnn_224_2.csv', index=False)

**NOTES**
* Try multi input with keras preproc
* Try with different cnn
* Try with different fcn
* Try to change final architecture