## Loading and Preprocessing Images

In [1]:
import cv2
import os
import numpy as np

def preprocess_image(file_path):
    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
    
    # Remove Noise 
    img = cv2.medianBlur(img, 3)

    # Normalize
    img = img / 255.0

    return img
    
base_directory = 'Characters/'
X = []
y = []


for label, folder in enumerate(sorted(os.listdir(base_directory))):
    folder_path = os.path.join(base_directory, folder)
    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        X.append(preprocess_image(file_path))
        y.append(label)

X = np.array(X)
y = np.array(y)

In [2]:
# Dividing the data into train and test.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=2)

In [3]:
# Reshaping the data to fit the CNN Model.
X_train = X_train.reshape(-1, 52, 52, 1)
X_test = X_test.reshape(-1, 52, 52, 1)

## CNN Implementation

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
model = Sequential([
    # Convolution Layers.
    Conv2D(32, (3,3), activation='relu', input_shape=(52,52,1)),  
    BatchNormalization(),
    MaxPooling2D((2,2)),
    
    Conv2D(64, (3,3), activation='relu'),  
    BatchNormalization(),
    MaxPooling2D((2,2)),
    
    Conv2D(64, (3,3), activation='relu'),  
    BatchNormalization(),
    MaxPooling2D((2,2)),
    
    # Flatten Layer.
    Flatten(),

    # Hidden Layer.
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    
    # Output Layer.
    Dense(657, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
# Train the CNN Model.
model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Epoch 1/5
[1m3206/3206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m476s[0m 147ms/step - accuracy: 0.2623 - loss: 3.9637 - val_accuracy: 0.8735 - val_loss: 0.4521
Epoch 2/5
[1m3206/3206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m421s[0m 131ms/step - accuracy: 0.7926 - loss: 0.7007 - val_accuracy: 0.9155 - val_loss: 0.2829
Epoch 3/5
[1m3206/3206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m372s[0m 116ms/step - accuracy: 0.8504 - loss: 0.4816 - val_accuracy: 0.9348 - val_loss: 0.2075
Epoch 4/5
[1m3206/3206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m350s[0m 109ms/step - accuracy: 0.8741 - loss: 0.3937 - val_accuracy: 0.9549 - val_loss: 0.1407
Epoch 5/5
[1m3206/3206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m300s[0m 94ms/step - accuracy: 0.8916 - loss: 0.3404 - val_accuracy: 0.9615 - val_loss: 0.1227


<keras.src.callbacks.history.History at 0x266f4b37fe0>

In [6]:
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
from sklearn.metrics import accuracy_score
print(accuracy_score(y_pred_classes, y_test))

[1m1374/1374[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 21ms/step
0.9615174672489083


In [7]:
model.save('cnn.h5')



## Feature Extraction

In [8]:
from tensorflow.keras.models import Model
feature_extractor = Model(inputs=model.inputs, outputs=model.get_layer(index=9).output)
X_train_features = feature_extractor.predict(X_train)
X_test_features = feature_extractor.predict(X_test)

Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(32, 52, 52, 1))


[1m3206/3206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 21ms/step


Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(None, 52, 52, 1))


[1m1374/1374[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 22ms/step


## KNN Implementation

In [9]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
knn = KNeighborsClassifier()
knn.fit(X_train_features, y_train)
y_pred_knn = knn.predict(X_test_features)
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print(f"KNN Accuracy: {accuracy_knn}")

KNN Accuracy: 0.9150973435225619


## SVM Implementation

In [10]:
from sklearn.decomposition import PCA
pca = PCA(n_components=50)
X_train_pca = pca.fit_transform(X_train_features)
X_test_pca = pca.transform(X_test_features)

In [11]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
svm = SVC(kernel='linear')
svm.fit(X_train_pca, y_train)
y_pred_svm = svm.predict(X_test_pca)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy: {accuracy_svm}")

SVM Accuracy: 0.9815547671033479
