In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
import cv2
import joblib as jb

pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 5000)
pd.set_option('display.width', 10000)
warnings.filterwarnings('ignore')

# Read Datasets

In [None]:
df = pd.read_csv('emnist-balanced-train.csv' , header=None)
char_map = pd.read_csv('emnist-balanced-mapping.txt' , delimiter=' ', header=None , index_col=0 , squeeze=True)

df

# Get Labels

In [None]:
mappers = {}
for index, label in enumerate(char_map):
    mappers[index] = chr(label)

In [None]:
mappers

# Split Dataset

In [None]:
X_df = df.iloc[:,1:]
y_df = df.iloc[ : , 0 ]

# Rotate Images

In [None]:
def rotate(image):
    image = image.reshape([28, 28])
    image = np.fliplr(image)
    image = np.rot90(image)
    return image

In [None]:
X_df = np.asarray(X_df)
X_df = np.apply_along_axis(rotate, 1, X_df)

# Test

In [None]:
index=3333

print("Target is:" , label_dictionary[(y_df[index])])

plt.imshow(X_df[index])

In [None]:
X_df.shape

In [None]:
nsamples, nx, ny = X_df.shape
X_df = X_df.reshape(nsamples,nx*ny)

# Split to Fit

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size= 0.15, random_state=22)

# Decision Tree

In [None]:
DTC = DecisionTreeClassifier(max_depth=35 , min_samples_split=80)
DTC.fit(X_train , y_train)

In [None]:
y_predDT = DTC.predict(X_test)

In [None]:
f1_score(y_predDT,y_test , average='weighted')

In [None]:
accuracy_score(y_predDT,y_test)

# Random Forest

In [None]:
RF_model = RandomForestClassifier(n_estimators=350 , criterion='gini',
                                  max_depth=30, min_samples_split=40, 
                                  min_samples_leaf=24, n_jobs= -1)

In [None]:
RF_model.fit(X_train , y_train)

In [None]:
y_pred = RF_model.predict(X_test)

In [None]:
f1_score(y_pred,y_test , average='weighted')

In [None]:
accuracy_score(y_pred,y_test)

# Visualize 

In [None]:
cf = confusion_matrix(y_test, y_pred)

In [None]:
pd.DataFrame(cf).style.background_gradient(cmap = "inferno")

In [None]:
plt.figure(figsize=(10, 8), dpi=80)
for i in range(100, 118):
    plt.subplot(3,6,i-99)
    plt.imshow(X_test[i].reshape(28, 28) , cmap=plt.cm.gray)
    plt.title(label_dictionary[y_pred[i]])

# Test external sample


In [None]:
img = cv2.imread('Untitled.png')
plt.imshow(img)

In [None]:
gray = cv2.cvtColor(img , cv2.COLOR_BGR2GRAY)

In [None]:
resized_img = cv2.resize(gray , (28,28) , interpolation = cv2.INTER_AREA)
final_img = resized_img.reshape(-1)

In [None]:
y_pred_samp = RF_model.predict([final_img])

In [None]:
print("Predicted " + label_dictionary[ ( y_pred_samp[0] ) ]  )
im = final_img
im = rotate(im)
im = rotate(im)
plt.imshow(im)

# Saving The Model

In [None]:
jb.dump(RF_model , 'rf_model.sav')

In [None]:
rrr = jb.load("rf_model.sav")

# Test the saved Model

In [None]:
print("Predicted " + label_dictionary[ ( rrr.predict([final_img])[0] ) ]  )
im = final_img
im = rotate(im)
im = rotate(im)
plt.imshow(im)