## Import Libraries

In [None]:
# Dataset Processing
from PIL import Image
import numpy as np
import pandas as pd

# Machine Learning
from sklearn.model_selection import train_test_split

from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, BatchNormalization

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns
import os

%matplotlib inline

## Define Constants

In [None]:
IMAGE_WIDTH=256
IMAGE_HEIGHT=256
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS=3

ROOT_DIR = ""
LABEL2INT ={"A": 0, "B": 1, "C": 2, "D": 3,
            "E": 4, "F": 5, "G": 6, "H": 7,
            "I": 8, "J": 9, "K": 10, "L": 11,
            "M": 12, "N": 13, "O": 14, "P": 15,
            "Q": 16, "R": 17, "S": 18, "T": 19,
            "U": 20, "V": 21, "W": 22, "X": 23,
            "Y": 24, "Z": 25}

## Load Dataset
### 2.1 Extract filenames

In [None]:
filenames = os.listdir(f"{ROOT_DIR}/dataset")

# filename format: [category].[id].jpg

categories = []
for filename in filenames:
    category = filename.split('.')[0]
    categories.append(LABEL2INT[category])

df = pd.DataFrame({
    'filename': filenames,
    'category': categories
})

### 2.2 Convert filenames to images

In [None]:
def filename_to_image(filename):
    img = Image.open(f"{ROOT_DIR}/dataset/{filename}")
    img = img.resize(IMAGE_SIZE)
    return np.array(img)

train_df, test_df = train_test_split(df, test_size=0.20, random_state=42)

X_train = train_df['filename'].apply(filename_to_image)
Y_train = train_df['category']

X_test = test_df['filename'].apply(filename_to_image)
Y_test = test_df['category']

g = sns.countplot(df['category'])

### 2.3 Label encoding

In [None]:
# Encode labels to one hot vectors (ex : "B" -> 0 -> [0,1,0,0,0,0,
                                                    # 0,0,0,0,0,0,
                                                    # 0,0,0,0,0,0,
                                                    # 0,0,0,0,0,0,
                                                    # 0,0])
Y_train = to_categorical(Y_train, num_classes = 26)
Y_test = to_categorical(Y_test, num_classes = 26)

### 2.4 Example Sample

In [None]:
# Some examples
g = plt.imshow(X_train[0][:,:,:])

## CNN
### 3.1 Define the model

In [None]:
# Set the CNN model 
# our CNN architechture is In -> [[Conv2D->relu] -> BatchNormalize -> MaxPool2D]*3 -> Flatten -> Dense -> BatchNormalize -> Out

model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(26, activation='softmax'))

### 3.2 Set the Optimizer

In [None]:
# Compile the model
model.compile(optimizer = 'rmsprop', loss = "categorical_crossentropy", metrics=["accuracy"])

## Training

In [None]:
model.fit(X_train, Y_train, batch_size = 64, epochs = 10, validation_data = (X_test, Y_test))

## Evaluation

# Advanced Concepts
## Preprocessing 
Histogram Equalization, Mean Normalization, Standard Deviation Normalization
## Augmentation
Rotating, Mirroring, Blurring, Noising
## Regularization
Handling Overfitting
## Model Designing
## Optimizers
## Finetuning Existing Models