In [None]:
import json
from sklearn.model_selection import train_test_split
import pandas as pd

In [None]:
file = open('/kaggle/input/chula-parasite-dataset/Chula-ParasiteEgg-11/Chula-ParasiteEgg-11/Chula-ParasiteEgg-11/labels.json')
data = json.load(file)

# Load the images and their details and create a dataframe

In [None]:
image_df = pd.DataFrame.from_dict(pd.json_normalize(data['images']), orient='columns')
image_df

# Load the annotations and their details and create a dataframe

In [None]:
annotation_df = pd.DataFrame.from_dict(pd.json_normalize(data['annotations']), orient='columns')
annotation_df

In [None]:
duplicate_values = annotation_df['image_id'].duplicated()
duplicate_values

# Merging the above two dataframe
Merged using the image_id column

In [None]:
merged_df = pd.merge(image_df, annotation_df, left_on='id', right_on='image_id', how='inner')

# Drop the extra 'image_id' column as it's now redundant
merged_df.drop(columns=['image_id'], inplace=True)
merged_df

# Seperating features and target variables

In [None]:
X = merged_df[['id_x','file_name','height','width','area']]
y = merged_df[['category_id','bbox']]

In [None]:
X

In [None]:
y

# Spliting the traning and testing sets

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,shuffle=True,test_size=0.2)
X_test,X_val,y_test,y_val = train_test_split(X_test,y_test,test_size=0.2)

# Below code should be changed.

## Resizing images into one size

In [None]:
image_height = 299
image_width = 299 
batch_size = 8

In [None]:
import matplotlib.pyplot as plt
import cv2 as cv

In [None]:
dir_path = '/kaggle/input/chula-parasite-dataset/Chula-ParasiteEgg-11/Chula-ParasiteEgg-11/Chula-ParasiteEgg-11/data'

img = cv.imread("/kaggle/input/chula-parasite-dataset/Chula-ParasiteEgg-11/Chula-ParasiteEgg-11/Chula-ParasiteEgg-11/data/Ascaris lumbricoides_0003.jpg")
plt.imshow(img)

In [None]:
#from tqdm import tqdm
import tensorflow as tf
from keras.preprocessing import image

In [None]:
X_train['abs_file_name'] = X_train['file_name'].apply(lambda x:dir_path + '/' + x)
filenames = tf.constant(X_train['abs_file_name'])
labels = tf.constant(y_train)

def one_hot_encode(label):
    encoding = []
    for i in range(int(label)):
        encoding.append(0)
    encoding.append(1)
    for j in range(int(10-int(label))):
        encoding.append(0)
    return tf.convert_to_tensor(encoding,dtype=tf.float32)

train_dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))

def im_file_to_tensor(file, label):
    image = tf.io.read_file(file)
    image_decoded = tf.image.decode_image(image, channels=3,dtype=tf.float32,expand_animations = False)
#     im = image_decoded / 255.0
    im = tf.keras.preprocessing.image.smart_resize(image_decoded, (image_height,image_width), interpolation='bilinear')
#     im = tf.image.resize(im,,preserve_aspect_ratio=True)
    label = tf.one_hot(label,depth=11)
    return im, label

    

train_dataset = train_dataset.map(im_file_to_tensor)


In [None]:
for i in train_dataset:
    #display(i[0].numpy())
    break

In [None]:
train_batched_dataset = train_dataset.batch(batch_size)


In [None]:
X_val['abs_file_name'] = X_val['file_name'].apply(lambda x:dir_path + '/' + x)
filenames = tf.constant(X_val['abs_file_name'])
labels = tf.constant(y_val)

eval_dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))

# def im_file_to_tensor(file, label):
#     image = tf.io.read_file(file)
#     image_decoded = tf.image.decode_image(image, channels=3,dtype=tf.float32,expand_animations = False)
# #     im = image_decoded / 255.0
#     im = tf.keras.preprocessing.image.smart_resize(image_decoded, (image_height,image_width), interpolation='bilinear')
#     label = one_hot_encode(label)
#     return im, label
    

eval_dataset = eval_dataset.map(im_file_to_tensor)

In [None]:
eval_batched_dataset = eval_dataset.batch(batch_size)

## Model Training

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3

In [None]:
pre_trained_model = InceptionV3(input_shape=(image_height,image_width,3),
                               include_top=False,
                                weights='imagenet'
                               )

In [None]:
for layer in pre_trained_model.layers:
    layer.trainable = False

In [None]:
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.callbacks import Callback

In [None]:
x = layers.Flatten()(pre_trained_model.output)
x = layers.Dense(256,activation='relu')(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(11,activation='softmax')(x)
model = Model(pre_trained_model.input,x)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
# model.summary()

In [None]:
for i in train_batched_dataset:
    #print(i)
    break

In [None]:
history = model.fit_generator(
    generator=train_batched_dataset,
    validation_data=eval_batched_dataset,
    steps_per_epoch=100,
    epochs=100,
    validation_steps=50,
)


In [None]:
#1. use adam instead of rms

In [None]:
# use efficient net
# set seed - numpy,tf,random
#tune learning rate, try learning rate schedulers
# input pipeline:prefetch, tf.func-->/255 in tf func ani decode vagare in another
