<a href="https://colab.research.google.com/github/IqbalLx/Hanacaraka-AI/blob/master/Hanacaraka%20AI%20-%20notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Mount your Google Drive to Colab

In [0]:
from google.colab import drive
drive.mount('/content/gdrive/')

In [0]:
import zipfile

filename = '/content/gdrive/My Drive/HanacarakaAI/datasetv3.zip' # Adjust to your own drive directory
zip_ref = zipfile.ZipFile(filename)
zip_ref.extractall()
zip_ref.close()

In [None]:
import os
import time
import math

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as plt_image
plt.style.use('seaborn')

import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image as keras_image

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, GlobalAveragePooling2D
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.layers import BatchNormalization

from tensorflow.keras.regularizers import l2

from tensorflow.keras.optimizers import Adam

from tensorflow.keras.callbacks import TensorBoard
%load_ext tensorboard

# Helper function for plotting Accuracy and Loss

In [None]:
def plot(hist):
  history = hist.history
  history['epoch'] = hist.epoch

  plt.figure(figsize=(12, 5))

  plt.subplot(121)
  plt.plot(history['epoch'], history['loss'], label='Loss')
  plt.plot(history['epoch'], history['val_loss'], label='Val Loss', color='orange')
  plt.legend()

  plt.subplot(122)
  plt.plot(history['epoch'], history['accuracy'], label='Acc')
  plt.plot(history['epoch'], history['val_accuracy'], label='Val Acc', color='orange')
  plt.legend()

  return plt.show()

# Helper function for testing model

In [None]:
classes = ['ba', 'ca', 'da', 'dha', 'ga', 'ha', 'ja', 'ka', 'la', 'ma',
           'na', 'nga', 'nya', 'pa', 'ra', 'sa', 'ta', 'tha', 'wa', 'ya']

def test(model, width):
  test_images_paths = os.listdir('dataset/prediction')
  for path in test_images_paths:
    image_path = os.path.join('dataset/prediction', path)

    image = keras_image.load_img(image_path,
                                 color_mode='grayscale',
                                 target_size=(width, width))
    x = keras_image.img_to_array(image)
    x /= 255
    x = np.expand_dims(x, axis=0)

    test_image = np.vstack([x])
    result = model.predict(test_image, batch_size=8)

    print(image_path)
    print(classes[np.argmax(result)])

    preview = plt_image.imread(image_path)
    plt.imshow(preview)
    plt.show()
  return print('Prediction Done')

# Image Data Generator

In [0]:
SIZE = 100 #Image width

train_datagen = ImageDataGenerator(rescale=1/255,
                                   rotation_range=20,
                                   zoom_range=0.2,
                                   shear_range=0.1)

validation_datagen = ImageDataGenerator(rescale=1/255)

train_generator = train_datagen.flow_from_directory(
    'dataset/training',
    target_size=(SIZE, SIZE),
    batch_size=32,
    color_mode='grayscale',
    class_mode = 'categorical'
)

val_generator = validation_datagen.flow_from_directory(
    'dataset/testing',
    target_size=(SIZE, SIZE),
    batch_size=32,
    color_mode='grayscale',
    class_mode = 'categorical'
)

Found 1762 images belonging to 20 classes.
Found 979 images belonging to 20 classes.


# Build baseline model

In [0]:
reduce_on_plateau = tf.keras.callbacks.ReduceLROnPlateau(
                    monitor='val_loss', factor=0.1, patience=5, verbose=0, mode='auto',
                    min_delta=0.0001, cooldown=3, min_lr=1e-8
                )

## Custom callbacks for Early Stopping

We decided to write our own callback class for early stopping for some flexibility. Early stop if accuracy already hit more than 80% and didn't significantly increased after that.

In [0]:
class early_stop(tf.keras.callbacks.Callback):
  def __init__(self, threshold=0.8, patience=3, significance_val=0.001):
    super(early_stop, self).__init__()

    self.threshold = threshold                                                    #Minimum accuracy
    self.patience = patience                                                      #Limit epoch to wait if accuracy no longer increased
    self.significance_val = significance_val                                      #Significant value to mark improvement in accuracy
    self.best_weights = None                                                      #Best weights record
    self.is_reach_threshold = False                                               #Track if already reach threshold value or not
    self.best_epoch = 0                                                           #Track best epoch that produce best weights

  def on_train_begin(self, logs=None):
    self.previous_acc = 0                                                         #Track previous accuracy
    self.wait = 0                                                                 #Track epoch when accuracy no longer increased

  def on_epoch_end(self, epoch, logs={}):
    self.current_acc = logs.get('val_accuracy')                                   #Current epoch's validation accuracy

    if self.is_reach_threshold:
      if self.current_acc - self.previous_acc > self.significance_val:
        self.wait = 0
        self.previous_acc = self.current_acc
        self.best_weights = self.model.get_weights()
        self.best_epoch = epoch
        print('\n[INFO] Best weights updated!')
      else:
        self.wait += 1
        print(f'\n[INFO] Current wait = {self.wait}')
        if self.wait >= self.patience:
          self.model.stop_training = True
          print(f'\n[INFO] Validation Accuracy didnt increased after {self.patience} epochs, training stopped...')
          self.model.set_weights(self.best_weights)
          print(f'\n[INFO] Restoring best weights from epoch {self.best_epoch + 1} val_acc')
    
    if self.current_acc > self.threshold and not self.is_reach_threshold:
      self.is_reach_threshold = True
      self.previous_acc = self.current_acc
      self.best_weights = self.model.get_weights()
      self.best_epoch = epoch
      print('\n[INFO] Threshold reached! Best weights start here...')

In [None]:
base_model = Sequential([Conv2D(16, (3, 3), activation='relu', input_shape=(224, 224, 1)),
                         MaxPool2D(2, 2),
                         Conv2D(32, (3, 3), activation='relu'),
                         MaxPool2D(2, 2),
                         Conv2D(32, (3, 3), activation='relu'),
                         MaxPool2D(2, 2),
                         Conv2D(64, (3, 3), activation='relu'),
                         MaxPool2D(2, 2),
                         Conv2D(64, (3, 3), activation='relu'),
                         MaxPool2D(2, 2),
                         Flatten(),
                         Dropout(0.5),
                         Dense(128, activation='relu'),
                         Dense(20, activation='softmax')])

base_model.compile(
    loss='categorical_crossentropy',
    optimizer='Adam',
    metrics=['accuracy']
)

base_model.summary()

In [None]:
assert False

In [None]:
LOGS = '/content/gdrive/My Drive/HanacarakaAI/logs/'
NAME = f'improved_model-{time.time()}'
tensorboard = TensorBoard(log_dir=os.path.join(LOGS, NAME), histogram_freq=1)

early_stop = early_stop(patience=20)

%tensorboard --logdir='/content/gdrive/My Drive/HanacarakaAI/logs/'

In [None]:
base_model_hist = base_model.fit(
    train_generator,
    epochs = 100,
    steps_per_epoch = int(1762/32),
    validation_data = val_generator,
    validation_steps= int(979/32),
    callbacks=[reduce_on_plateau, early_stop, tensorboard],
    verbose=1
)

In [None]:
plot(base_model_hist)

In [None]:
MODEL_PATH = '/content/gdrive/My Drive/HanacarakaAI/model/baseline'
tf.saved_model.save(improved_model, os.path.join(MODEL_PATH, 'saved_model'))

In [None]:
tf.keras.models.save_model(improved_model, os.path.join(MODEL_PATH, 'improved_model.h5'))