## Importing Libraries

In [None]:
# Python libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import glob

# Preprocessing libraries
from PIL import Image, ImageEnhance
from sklearn.preprocessing import LabelEncoder

# TF libraries
import tensorflow as tf
from tensorflow import keras
import keras.backend as K
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import Precision, Recall

# Fine tuning libraries
import keras_tuner

## GPU set Memory Growth

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

## Importing Data

### Importing Labels

In [None]:
training_path = "./Dataset/Training"
training_labels = pd.read_csv(
    os.path.join(training_path,"training_labels.csv"),
    delimiter = ","
)

validation_path = "./Dataset/Validation"
validation_labels = pd.read_csv(
    os.path.join(validation_path,"validation_labels.csv"),
    delimiter = ","
)

testing_path = "./Dataset/Testing"
testing_labels = pd.read_csv(
    os.path.join(testing_path,"testing_labels.csv"),
    delimiter = ","
)

### Encoding Labels

In [None]:
encoder = LabelEncoder()

training_labels_enc = to_categorical(
    encoder.fit_transform(training_labels["MEDICINE_NAME"]),
    num_classes=78
)

validation_labels_enc = to_categorical(
    encoder.fit_transform(validation_labels["MEDICINE_NAME"]),
    num_classes=78
)

testing_labels_enc = to_categorical(
    encoder.fit_transform(testing_labels["MEDICINE_NAME"]),
    num_classes=78
)

### Importing Images

#### Config

In [None]:
image_size = (224, 224)
padding_horizontal = 200
padding_vertical = 75
padding_color = (255, 0, 0)

batch_size = 32

#### Import Function

In [None]:
def get_images(path, labels_enc):
    images = []
    files = glob.glob(path + "/*png")
    for file in files:
        image = Image.open(file).convert("RGB")

        # top = 75 // 2 - image.height // 2
        # bottom = 75 // 2 - image.height // 2
        # left = 200 // 2 - image.width // 2
        # right = 200 // 2 - image.width // 2

        # new_width = image.width + left + right
        # new_height = image.height + top + bottom

        # padded_image = Image.new(image.mode, (new_width, new_height), padding_color)
        # padded_image.paste(image, (left, top))

        # padded_image = padded_image.resize(image_size)

        images.append(np.asarray(image.resize((100, 300))).astype("float32")/255.0)
    
    images = np.array(images)
    np_labels_enc = np.array(labels_enc)
    dataset = tf.data.Dataset.from_tensor_slices((images, np_labels_enc))
    dataset = dataset.batch(batch_size)

    return dataset

In [None]:
training_dataset = get_images(training_path + "/training_words", training_labels_enc)
validation_dataset = get_images(validation_path + "/validation_words", validation_labels_enc)
testing_dataset = get_images(testing_path + "/testing_words", testing_labels_enc)

# Shuffling Training Dataset
BUFFER_SIZE = 3120
training_dataset = training_dataset.shuffle(BUFFER_SIZE, seed = 42)

In [None]:
# plt.imshow(training_images[0])

## Creating Model

In [None]:
def create_model(hp):
    input_shape = (300, 100, 3)
    model = Sequential()
    metrics = [
        'accuracy', 
        Precision(name = 'precision'),
        Recall(name = 'recall'),
        # f1_score
    ]
    
    #input layer. The shape of the input layer must be huge to scale down the image. Ideally (5x5) or (7x7)
    model.add(layers.Conv2D(filters = hp.Choice("input_filter", [32, 64, 128]),
                            kernel_size = hp.Choice("input_kernel_size", [3, 5, 7]),
                            activation = hp.Choice("input_activation", ["relu", "leaky_relu"]),
                            input_shape = input_shape))
    model.add(layers.MaxPooling2D(pool_size=2))
    

    #Convolutional layer
    for i in range(hp.Choice("n_conv_layers", [1, 4])):
        model.add(layers.Conv2D(filters = hp.Choice(f"conv_filter_{i}", [32, 64, 128]),
                            kernel_size = hp.Choice(f"conv_kernel_size_{i}", [3, 5, 7]),
                            activation = hp.Choice(f"conv_activation_{i}", ["relu", "leaky_relu"])))
        if i < 1:
            model.add(layers.MaxPooling2D(pool_size=2))
        
        # Add Dropout layer after each convolutional layer
        # model.add(layers.Dropout(rate=hp.Float(f"dropout_conv_{i}", 0.2, 0.5, step=0.1, default=0.25)))
        
    #Flatten the resulted image so that the dense layer could extract the patterns and categorize it
    model.add(layers.Flatten())
    
    #Building the dense layers
    for i in range(hp.Int("n_dense_layer", 1, 5, step = 1, default = 1)):
        model.add(layers.Dense(hp.Choice(f"n_neurons_dense_{i}", [256, 512, 1024]), 
                            activation = hp.Choice(f"dense_activation{i}", ["relu", "leaky_relu"])))
        
    # Add Dropout layer after dense layer
    # model.add(layers.Dropout(rate=hp.Float(f"dropout_dense_{i}", 0.2, 0.5, step=0.1, default=0.25)))

    #Output layer
    #there are 78 classes in the output layer so we will take 78 neurons to classify it
    model.add(layers.Dense(78, activation = "softmax"))

    # GPU Optimized optimizers
    optimizer_name = hp.Choice('optimizer', ['adam', 'rmsprop'])

    if optimizer_name == 'adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', [0.0001, 0.001, 0.01]))
    # if optimizer_name == 'sgd':
    #     optimizer = tf.keras.optimizers.SGD(learning_rate=hp.Choice('learning_rate', [0.00001, 0.0001, 0.001]), momentum=0.9)
    elif optimizer_name == "rmsprop":
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=hp.Choice('learning_rate', [0.0001, 0.001]), rho=0.9)

    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=metrics)
    
    return model

## Tuning

In [None]:
tuner = keras_tuner.Hyperband(
    hypermodel = create_model,
    objective = keras_tuner.Objective("accuracy", "max"),
    max_epochs = 500,
    factor = 5,
    hyperband_iterations = 1,
    seed = 42,
    # distribution_strategy = tf.distribute.MirroredStrategy(), # this is to speed up the process by distributing the load for computation (only for gpu)
    project_name = "medzy-train"
)

: 

In [None]:
validation_steps = 780 #the number of validation for number of batches
accumulation_steps = 4

tuner.search(
    training_dataset,
    epochs = 500,
    validation_data = validation_dataset, #data used to evaluate the model after each epoch
    validation_steps = validation_steps, #number of batches used for validation
    callbacks = [
        keras.callbacks.EarlyStopping("accuracy", mode="max"),
        keras.callbacks.EarlyStopping("val_recall", mode="max")
    ]
    # callbacks=[CustomTunerCallback(img_height, img_width, train_images_full, train_labels_full, val_images, val_labels, accumulation_steps, batch_size, num_classes)],
)

