## Overview
* Trained a model using the cropped images obtained from script crop right noise
* Increasing the size of data by doing augmentation is not yet implemented

# Import Libraries

In [14]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

#Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers as L
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import os
import cv2
import glob
import skimage.io as io
from skimage.color import rgb2gray

#Data Visualizations
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
trainData = []
trainLabels = []

directory="/kaggle/input/cropped-data/cropped_characters/*/*"

paths = glob.glob(directory)
for path in paths:
    img = io.imread(path)
    trainData.append(img)
    img_label = path.split('/')[-2]
    trainLabels.append(img_label)

trainData = np.array(trainData)
trainLabels = np.array(trainLabels)

print(trainLabels.shape)
print(trainData.shape)
print(trainData[0].shape)

In [15]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array, array_to_img

def change_size(image):
    img = array_to_img(image, scale=False) #returns PIL Image
    img = img.resize((75, 75)) #resize image
    arr = img_to_array(img) #convert back to array
    return arr.astype(np.float64)

In [16]:
train_datagen = ImageDataGenerator(rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2) # set validation split

train_generator = train_datagen.flow_from_directory(
    '/kaggle/input/cropped-data/cropped_characters',
    target_size=(75, 75),
    batch_size=20,
    class_mode='categorical',
    subset='training') # set as training data

validation_generator = train_datagen.flow_from_directory(
    '/kaggle/input/cropped-data/cropped_characters', # same directory as training data
    target_size=(75, 75),
    batch_size=20,
    class_mode='categorical',
    subset='validation') # set as validation data

We split the data into the train and validation sets. Here is the distribution of the split data.

In [18]:
sns.barplot(['train', 'valid'], [train_generator.n, validation_generator.n])

In [19]:
model = Sequential()

model.add(tf.keras.applications.resnet50.ResNet50(input_shape = (75, 75, 3), 
                                include_top = False, 
                                weights = 'imagenet'))

model.add(L.Flatten())
model.add(L.Dense(128, activation='relu'))
model.add(L.Dense(38, activation='softmax'))

model.compile(optimizer=keras.optimizers.Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
#Do not use default learning rate since it is too high!

In [20]:
for layer in model.layers[0].layers:
    if layer.name == 'conv5_block1_0_conv':
        break
    layer.trainable=False

In [21]:
history = model.fit(train_generator, epochs=5, 
            validation_data=validation_generator,
            validation_steps=50,
          steps_per_epoch=train_generator.n//train_generator.batch_size)

In [25]:
#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc      = history.history[     'accuracy' ]
val_acc  = history.history[ 'val_accuracy' ]
loss     = history.history[    'loss' ]
val_loss = history.history['val_loss' ]

epochs   = range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot  ( epochs,     acc )
plt.plot  ( epochs, val_acc )
plt.title ('Training and validation accuracy')
plt.legend(['train', 'test'])
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot  ( epochs,     loss )
plt.plot  ( epochs, val_loss )
plt.title ('Training and validation loss'   )
plt.legend(['train', 'test'])