In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in  

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory 
import cv2
import math
from PIL import Image
import matplotlib.pyplot as plt

import keras
from keras import layers
from keras.engine.topology import Input
from keras.engine.training import Model
from keras.models import Sequential, load_model, save_model
from keras.layers import Activation, Flatten, Dense, Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import Conv2D, UpSampling2D, Conv2DTranspose, ZeroPadding2D
from keras.layers.pooling import MaxPooling2D, AveragePooling2D, GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.layers.core import SpatialDropout2D
from keras.layers.merge import concatenate, Add
from keras.layers.normalization import BatchNormalization
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.optimizers import RMSprop, SGD
from sklearn.preprocessing import OneHotEncoder
from keras.applications import DenseNet201, InceptionResNetV2

from sklearn.model_selection import train_test_split, StratifiedKFold, KFold

import gc
import os
print(os.listdir("../input")) 

# Any results you write to the current directory are saved as output.

In [None]:
gc.enable()
version = 0
basic_name = f'iWildCam_v3_{version}'
save_model_name = basic_name + '.model'

print(basic_name)

In [None]:
train_df = pd.read_csv('../input/train.csv')
train_df['category_id'] = train_df['category_id'].astype(str)
train_df['category_id'].value_counts()[0:22].plot(kind='bar')
train_shape = train_df.shape
train_ids = train_df['id'].values

In [None]:
image_width = 224
image_height = 224
image_channels = 3 
batch_size = 32

In [None]:
class Metrics(Callback):
    def on_train_begin(self, logs={}):
        self.val_f1s = []
        self.val_recalls = []
        self.val_precisions = []

    def on_epoch_end(self, epoch, logs={}):
        X_val, y_val = self.validation_data[:2]
        y_pred = self.model.predict(X_val)

        y_pred_cat = keras.utils.to_categorical(
            y_pred.argmax(axis=1),
            num_classes=14
        )

        _val_f1 = f1_score(y_val, y_pred_cat, average='macro')
        _val_recall = recall_score(y_val, y_pred_cat, average='macro')
        _val_precision = precision_score(y_val, y_pred_cat, average='macro')

        self.val_f1s.append(_val_f1)
        self.val_recalls.append(_val_recall)
        self.val_precisions.append(_val_precision)

        print((f"val_f1: {_val_f1:.4f}"
               f" — val_precision: {_val_precision:.4f}"
               f" — val_recall: {_val_recall:.4f}"))

        return

f1_metrics = Metrics()

In [None]:
def build_Models(input_size):

    base_model1 = DenseNet201(input_shape=input_size, weights='imagenet', include_top=False)
    for layer in base_model1.layers:
        layer.trainable = False

    gap_layer1 = GlobalAveragePooling2D()(base_model1.output)
    
    base_model2 = InceptionResNetV2(input_shape=input_size, weights='imagenet', include_top=False)
    for layer in base_model2.layers:
        layer.trainable = False
    
    gap_layer2 = GlobalAveragePooling2D()(base_model2.output)
    
    out_layer = concatenate([gap_layer1, gap_layer2])
    model = Model(inputs=[base_model1.input, base_model2.input], outputs=out_layer)    
    #model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
    
    return model

In [None]:
Combined_model = build_Models(input_size=((image_width, image_height, image_channels)))
Combined_model.summary()

In [None]:
def get_training_examples(_model=None, df=None, start_index=0, samples=1, img_width=299, img_height=299, img_channels=3):
    x_train_temp = np.zeros([samples, img_width, img_height, img_channels], dtype=np.float32)
    y_train_temp = np.zeros([samples], dtype=np.int)
    
    for i in range(samples):
        sample_df = df.iloc[start_index+i]
        y_train_temp[i] = int(sample_df['category_id'])
        image_name = sample_df['id']
        img = image.load_img(f'../input/train_images/{image_name}.jpg', target_size=(img_width, img_height))
        x_train_temp[i] = (image.img_to_array(img).astype('float32'))/255.0
    
    return _model.predict([x_train_temp, x_train_temp]), y_train_temp

In [None]:
sample_size = 5000
sample_iters = int(train_shape[0] / sample_size)
sample_remainder = train_shape[0] % sample_size

X_train, y_train = get_training_examples(_model=Combined_model, df=train_df, start_index=0, samples=sample_size, img_width=image_width, img_height=image_height, img_channels=image_channels)

for i in range(1, sample_iters, 1):
    X_train_temp, y_train_temp = get_training_examples(_model=Combined_model, df=train_df, start_index=i*sample_size, samples=sample_size, 
                                                       img_width=image_width, img_height=image_height, img_channels=image_channels)
    X_train = np.vstack((X_train, X_train_temp))
    y_train = np.append(y_train, y_train_temp)
    
X_train_temp, y_train_temp = get_training_examples(_model=Combined_model, df=train_df, start_index=sample_iters*sample_size, samples=sample_remainder, 
                                                       img_width=image_width, img_height=image_height, img_channels=image_channels)
X_train = np.vstack((X_train, X_train_temp))
y_train = np.append(y_train, y_train_temp)

In [None]:
del train_df, X_train_temp, y_train_temp 
gc.collect() 

In [None]:
onehot_encoder = OneHotEncoder(sparse=False) 
y_train_ohe = onehot_encoder.fit_transform(y_train.reshape(-1, 1))
catagories = y_train_ohe.shape[1] 

In [None]:
def build_model_nn(input_size, output_size, start_neurons, dropout_ratio=0.1):
    
    input_layer = Input(shape=(input_size,))
    
    dense1 = Dense(start_neurons*3, activation = "relu")(input_layer)
    dout1 = Dropout(rate=dropout_ratio)(dense1)
    batch1 = BatchNormalization()(dout1)
    dense2 = Dense(start_neurons, activation = "relu")(batch1)
    dout2 = Dropout(rate=dropout_ratio)(dense2)
    output_layer = Dense(output_size, activation = "softmax")(dout2)
    
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
    
    return model

In [None]:
early_stopping = EarlyStopping(monitor='val_acc', mode='max', patience=20, verbose=1)
model_checkpoint = ModelCheckpoint(save_model_name, monitor='val_acc', mode='max', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', mode='max',factor=0.5, patience=10, min_lr=0.0001, verbose=1)

model = build_model_nn(input_size=X_train.shape[1], output_size=catagories, start_neurons=256, dropout_ratio=0.15)
model.summary()

In [None]:
epochs = 30
folds = 4 

kfold = KFold(n_splits = folds, shuffle=True, random_state=41)

for i, (train_index, valid_index) in enumerate(kfold.split(X_train, y_train_ohe)):
    print("Starting Fold: ",i) 
    
    model.fit(X_train[train_index], y_train_ohe[train_index], batch_size=batch_size, epochs=epochs, validation_data=(X_train[valid_index], y_train_ohe[valid_index]), 
              verbose=True, callbacks=[early_stopping, model_checkpoint, reduce_lr, f1_metrics]) 

In [None]:
del X_train, y_train, y_train_ohe
test_df = pd.read_csv('../input/test.csv')
test_shape = test_df.shape 
gc.collect() 

In [None]:
def get_test_examples(_model=None, df=None, _test_labels=None, start_index=0, samples=1, img_width=299, img_height=299, img_channels=3):
    x_test_temp = np.zeros([samples, img_width, img_height, img_channels], dtype=np.float32)
    for i in range(samples):
        sample_df = df.iloc[start_index+i]
        image_name = sample_df['id']
        _test_labels.append(image_name)
        img = image.load_img(f'../input/test_images/{image_name}.jpg', target_size=(img_width, img_height))
        x_test_temp[i] = (image.img_to_array(img).astype('float32'))/255.0
    
    return _model.predict([x_test_temp, x_test_temp]), _test_labels

In [None]:
sample_size = 5000
sample_iters = int(test_shape[0] / sample_size)
sample_remainder = test_shape[0] % sample_size
test_labels = []

X_test, test_labels = get_test_examples(_model=Combined_model, df=test_df, _test_labels=test_labels, start_index=0, samples=sample_size, img_width=image_width, img_height=image_height, img_channels=image_channels)

for i in range(1, sample_iters, 1):
    X_test_temp, test_labels = get_test_examples(_model=Combined_model, df=test_df, _test_labels=test_labels, start_index=i*sample_size, samples=sample_size, 
                                                       img_width=image_width, img_height=image_height, img_channels=image_channels)
    X_test = np.vstack((X_test, X_test_temp))
    
X_test_temp, test_labels = get_test_examples(_model=Combined_model, df=test_df, _test_labels=test_labels, start_index=sample_iters*sample_size, samples=sample_remainder, 
                                                       img_width=image_width, img_height=image_height, img_channels=image_channels)
X_test = np.vstack((X_test, X_test_temp))

In [None]:
del test_df, X_test_temp
test_pred = model.predict(X_test) 

In [None]:
submission_df = pd.read_csv('../input/sample_submission.csv')
submission_df['Id'] = test_labels
submission_df['Predicted'] = onehot_encoder.inverse_transform(test_pred).astype(int)
submission_df.to_csv("submission.csv", index=False)

In [None]:
submission_df['Predicted'].value_counts()[0:22].plot(kind='bar')