## Necessary Import Modules****

In [None]:
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import glob
import cv2
import json
import glob
import torch
from tqdm import tqdm_notebook
from PIL import Image, ImageFile
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from sklearn.model_selection import train_test_split
from torchvision import transforms
from skimage import io
ImageFile.LOAD_TRUNCATED_IMAGES = True

%matplotlib inline

In [None]:
# Training Dataset
with open(r'/kaggle/input/iwildcam-2020-fgvc7/iwildcam2020_train_annotations.json') as train:
    train_data = json.load(train)

# Testing Dataset
with open(r'/kaggle/input/iwildcam-2020-fgvc7/iwildcam2020_test_information.json') as test:
    test_data = json.load(test)
    
print("Columns in training Json: ", train_data.keys())
print("Columns in testing Json:  ", test_data.keys())

In [None]:
train_data['categories']

In [None]:
train_jpeg = glob.glob('../input/iwildcam-2020-fgvc7/train/*')
test_jpeg = glob.glob('../input/iwildcam-2020-fgvc7/test/*')

print("number of train jpeg data:", len(train_jpeg))
print("number of test jpeg data:", len(test_jpeg))

train_jpeg[0]

# Training Images

In [None]:
fig = plt.figure(figsize=(25, 16))
for i,im_path in enumerate(train_jpeg[:16]):
    ax = fig.add_subplot(4, 4, i+1, xticks=[], yticks=[])
    im = Image.open(im_path)
    im = im.resize((360,270))
    plt.imshow(im)

# Testing Images

In [None]:
fig = plt.figure(figsize=(25, 16))
for i,im_path in enumerate(test_jpeg[:16]):
    ax = fig.add_subplot(4, 4, i+1, xticks=[], yticks=[])
    im = Image.open(im_path)
    im = im.resize((360,270))
    plt.imshow(im)

In [None]:
df_train = pd.DataFrame({'id': [item['id'] for item in train_data['annotations']],
                                'category_id': [item['category_id'] for item in train_data['annotations']],
                                'image_id': [item['image_id'] for item in train_data['annotations']],
                                'file_name': [item['file_name'] for item in train_data['images']]})


# df_test = pd.DataFrame.from_records(test_data['images'])

df_train.to_csv('train_data.csv')
# df_test.to_csv('test_data.csv')

In [None]:
df_image = pd.DataFrame.from_records(train_data['images'])
# print(df_image.head())

indices = []
for _id in df_image[df_image['location'] == 537]['id'].values:
#     print(_id)
    indices.append( df_train[ df_train['image_id'] == _id ].index )

for the_index in indices:
    df_train = df_train.drop(df_train.index[the_index])
    
df_train.info()

In [None]:
# Image.open('/kaggle/input/iwildcam-2020-fgvc7/train/'+df_train['file_name'][0])

## Remove the Unvalid or Corrupt Images from the training data****

In [None]:


for index, i in enumerate(df_train['file_name']):
    try:
        Image.open('/kaggle/input/iwildcam-2020-fgvc7/train/' + i)
    except:        
        df_train.drop(df_train.loc[df_train['file_name']==i].index, inplace=True)
        
df_train['category_id'] = df_train['category_id'].astype(str)
df_train.shape

In [None]:
NUM_CLASSES = len(df_train['category_id'].unique())
print(NUM_CLASSES)

nb_classes = len(train_data['categories'])
print(nb_classes)

batch_size = 32
IMG_SIZE = 64

NUM_EPOCHS = 10

ID_COLNAME = 'file_name'
ANSWER_COLNAME = 'category_id'
TRAIN_IMGS_DIR = r'../input/iwildcam-2020-fgvc7/train/'
TEST_IMGS_DIR = r'../input/iwildcam-2020-fgvc7/test/'

CHANNELS = 3

IMAGE_RESIZE = 299
RESNET50_POOLING_AVERAGE = 'avg'

STEPS_PER_EPOCH_TRAINING = 10
STEPS_PER_EPOCH_VALIDATION = 10

BATCH_SIZE_TRAINING = 100
BATCH_SIZE_VALIDATION = 100

# Using 1 to easily manage mapping between test_generator & prediction for submission preparation
BATCH_SIZE_TESTING = 1



In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
train_datagen=ImageDataGenerator(rescale=1./255
    )

train_generator=train_datagen.flow_from_dataframe(    
    dataframe=df_train[:50000],    
    directory="../input/iwildcam-2020-fgvc7/train",
    x_col=ID_COLNAME,
    y_col=ANSWER_COLNAME,
    batch_size=batch_size,
    shuffle=True,
    classes = [ str(i) for i in range(nb_classes-1)],
    class_mode="categorical",    
    target_size=(IMAGE_RESIZE,IMAGE_RESIZE))

valid_datagen = ImageDataGenerator(rescale=1./255)

valid_generator=valid_datagen.flow_from_dataframe(    
    dataframe=df_train[50000:80000],    
    directory="../input/iwildcam-2020-fgvc7/train",
    x_col=ID_COLNAME,
    y_col=ANSWER_COLNAME,
    batch_size=batch_size,
    shuffle=True,
    classes = [ str(i) for i in range(nb_classes-1)],
    class_mode="categorical",  
    target_size=(IMAGE_RESIZE,IMAGE_RESIZE))

# VGG16 Pretrained Model

In [None]:
from keras.applications.vgg19 import VGG19
from keras.applications.resnet import ResNet50 
from keras.applications.xception import Xception
from keras.models import Sequential 
from keras.layers import Flatten, Dense, Dropout 
from keras.optimizers import Adam, SGD 
from keras.callbacks import EarlyStopping

xception_model = Xception(weights="imagenet", include_top=False, input_shape=(224,224,3))

model = Sequential()
model.add(xception_model)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(266, activation='softmax'))
model.summary()

# Compile model with Adam Optimizer
model.compile(optimizer='adam',loss='categorical_crossentropy', metrics=['accuracy'])
early = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=1, mode='auto')

fit_history = model.fit_generator( 
    train_generator, 
    steps_per_epoch=STEPS_PER_EPOCH_TRAINING, 
    epochs = NUM_EPOCHS, 
    validation_data=valid_generator, 
    validation_steps=STEPS_PER_EPOCH_VALIDATION
)


In [None]:
history_df = pd.DataFrame(fit_history.history) 
history_df[['loss', 'val_loss']].plot() 
history_df[['acc', 'val_acc']].plot()

In [None]:
import gc
del train_datagen, train_generator
gc.collect()

In [None]:
sam_sub_df = pd.read_csv('../input/iwildcam-2020-fgvc7/sample_submission.csv')
sam_sub_df["file_name"] = sam_sub_df["Id"].map(lambda str : str + ".jpg")
sam_sub_df.head()

In [None]:
%%time

test_generator = test_datagen.flow_from_dataframe(      
    
        dataframe=sam_sub_df,    
    
        directory = "../input/iwildcam-2020-fgvc7/test",    
        x_col="file_name",
        target_size = (img_size,img_size),
        batch_size = 1,
        classes = [ str(i) for i in range(nb_classes)],
        shuffle = False,
        class_mode = None
        )

In [None]:
%%time
test_generator.reset()
predict=model.predict_generator(test_generator, steps = len(test_generator.filenames))
predicted_class_indices=np.argmax(predict,axis=1)
sam_sub_df["Category"] = predicted_class_indices
sam_sub_df = sam_sub_df.loc[:,["Id", "Category"]]
sam_sub_df.to_csv("submission.csv",index=False)