In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

#for dirname, _, filenames in os.walk('/kaggle/input'):
 #   for filename in filenames:
  #      print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
import json
import keras
import tensorflow as tf
from keras import layers
from keras.applications import DenseNet121
from keras.callbacks import Callback, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Dropout,Activation, Flatten, Conv2D, MaxPooling2D
from keras.models import Sequential
from keras.utils.vis_utils import plot_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score

In [None]:
os.listdir('../input/')

In [None]:
import os
import cv2
import glob
import math
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
label_df = pd.read_csv('../input/iwildcam-2019-fgvc6/train.csv')
submission_df =  pd.read_csv('../input/iwildcam-2019-fgvc6/sample_submission.csv')

In [None]:
def display_samples(df, columns=4, rows=3):
    fig = plt.figure(figsize = (5*columns, 3*rows))
    
    for i in range(columns*rows):
        image_path = df.loc[i, 'file_name']
        image_id = df.loc[i, 'category_id']
        img = cv2.imread(f'../input/iwildcam-2019-fgvc6/train_images/{image_path}')
        fig.add_subplot(rows, columns, i+1)
        plt.title(image_id)
        plt.imshow(img)
        
display_samples(label_df)

In [None]:
def get_pad_width(im, new_shape, is_rgb = True):
    pad_diff = new_shape - im.shape[0], new_shape - im.shape[1]
    t, b = math.floor(pad_diff[0]/2), math.ceil(pad_diff[0]/2)
    l, r = math.floor(pad_diff[1]/2), math.ceil(pad_diff[1]/2)
    if is_rgb:
        pad_width = ((t,b), (l,r), (0,0))
    else:
        pad_width = ((t,b), (l,r))
    return pad_width

def pad_and_resize(image_path, dataset, pad = False, desired_size = 64):
    img = cv2.imread(f'../input/iwildcam-2019-fgvc6/{dataset}_images/{image_path}.jpg')
                    
    if pad:
        pad_width = get_pad_width(img, max(img.shape))
        padded = np.pad(img, pad_width = pad_width, mode = 'constant', constant_values = 0)
    else:
        padded = img
    resized = cv2.resize(padded, (desired_size,)*2).astype('uint8')
    return resized

In [None]:
%%time
train_resized_imgs = []
test_resized_imgs = []

for image_id in label_df['id']:
    train_resized_imgs.append(pad_and_resize(image_id, 'train'))
print("Training done")
for image_id in submission_df['Id']:
    test_resized_imgs.append(pad_and_resize(image_id, 'test'))
    

In [None]:
X_train = np.stack(train_resized_imgs)
X_test = np.stack(test_resized_imgs)

target_dummies = pd.get_dummies(label_df['category_id'])
train_label = target_dummies.columns.values
y_train = target_dummies.values

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)

In [None]:
np.save('X_train.npy', X_train)
np.save('X_test.npy', X_test)
np.save('y_train.npy', y_train) 

In [None]:
x_train = np.load('../working/X_train.npy')
x_test = np.load('../working/X_test.npy')
y_train = np.load('../working/y_train.npy')

print('X_train.shape', x_train.shape)
print(x_train.shape[0], 'train_samples')
print(x_test.shape[0], 'test_samples')


In [None]:
#Converting the image to float and normalizing

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train = x_train/255
x_test = x_test/255

In [None]:
class Metrics(Callback):
    def on_train_begin(self, logs={}):
        self.val_f1s = []
        self.val_recalls = []
        self.val_precisions = []
        
    def on_epoch_end(self, epoch, logs ={}):
        X_val, y_val = self.validation_data[:2]
        y_pred = self.model.predict(X_val)
        
        y_pred_cat = keras.utils.to_categorical(y_pred.argmax(axis = 1), num_classes = num_classes)
        
        _val_f1 = f1_score(y_val, y_pred_cat, average = 'macro')
        _val_recall = recall_score(y_val, y_pred_cat, average = 'macro')
        _val_precision = precision_score(y_val, y_pred_cat, average = 'macro')
        
        self.val_f1s.append(_val_f1)
        self.val_recalls.append(_val_recall)
        self.val_precisions.append(_val_precision)
        
        print((f"val_f1: {_val_f1:.4f}" f"val_recall: {_val_recall:.4f}" f"val_precision: {_val_precision: .4f}"))
        
        return

In [None]:
batch_size = 64
num_classes = 14
epochs = 35
val_split = 0.2
save_dir = os.path.join(os.getcwd(), 'models')
model_name = 'keras_cnn_model.h5'

In [None]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding = 'same', input_shape = x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding = 'same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

In [None]:
f1_metrics = Metrics()

model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

hist = model.fit(x_train, y_train, batch_size = batch_size, epochs = epochs, 
                 callbacks = [f1_metrics], validation_split = val_split, shuffle = True)


In [None]:
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)

print('Saved trained model at %s', model_path)

In [None]:
history_df = pd.Dataframe(hist.history)
history_df['val_f1'] = f1_metrics._val_f1s
history_df['val_precision'] = f1.metrics._val_precisions
history_df['val_recall'] = f1.metrics._val_recalls

In [None]:
y_test = model.predict(x_test)

submission_df = pd.read_csv('../input/iwildcam-2019-fgvc6/')
submission_df['Predicted'] = y_test.argmax(axis=1)
print(submission_df.shape)


In [None]:
submission_df.to_csv('submission.csv', index = False)
history_df.to_csv('history.csv', index = False)

In [0]:
import pandas as pd
kaggle_sample_submission = pd.read_csv("../input/inaturalist-2019-fgvc6/kaggle_sample_submission.csv")