In [None]:
# Libraries
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
import os
import cv2
import numpy as np
import warnings

In [None]:
# Loading Data & PreProcessing
warnings.filterwarnings('ignore')

try:
    import tensorflow as tf
    from sklearn.model_selection import train_test_split

    print("Libraries Loaded Successfully")
except ImportError:
    print("Failed to Load Libraries!")


class DataLoader:
    def __init__(self, path=r'C:\Users\NoteBook\Desktop\alphabet\DS-3', image_size=50, shrink=0, padding=10, threshold=100, invert=False):
        self.PATH = path
        self.IMAGE_SIZE = image_size
        self.PADDING = padding
        self.INVERT = invert
        self.THRESHOLD = threshold
        self.SLICE = shrink

        self.x_data = []
        self.y_data = []
        self.labels = []
        self.CATEGORIES = []
        self.list_categories = []

    def get_categories(self):
        """Get and sort categories based on folder names."""
        for folder in os.listdir(self.PATH):
            label = folder.split("-")[0]
            self.labels.append(label)
            self.list_categories.append(folder)

        try:
            self.list_categories = sorted(self.list_categories, key=lambda x: int(x.split("-")[0]))
        except ValueError:
            self.list_categories = sorted(self.list_categories)

        print("Found Categories:", self.list_categories, '\n')
        return self.list_categories

    def centerize(self, image):
        """Center the letter in the image using contours to find the bounding box."""

        contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        if len(contours) == 0:
            return image

        x, y, w, h = cv2.boundingRect(max(contours, key=cv2.contourArea))

        cropped_image = image[y:y + h, x:x + w]

        centered_image = np.zeros((self.IMAGE_SIZE, self.IMAGE_SIZE), dtype=np.uint8)

        start_x = (self.IMAGE_SIZE - w) // 2
        start_y = (self.IMAGE_SIZE - h) // 2

        centered_image[start_y:start_y + h, start_x:start_x + w] = cropped_image

        return centered_image

    def enhance(self, image):
        """Enhance the quality of image by boosting the values below a certain threshold"""

        _, enhanced_image = cv2.threshold(image, self.THRESHOLD, 255, cv2.THRESH_BINARY)
        return enhanced_image

    def preprocess_image(self, image_path):
        """Preprocess the image by zooming in (shrinking) while keeping the final size constant."""

        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            return None

        padded_image = cv2.copyMakeBorder(image, self.PADDING, self.PADDING, self.PADDING, self.PADDING, cv2.BORDER_CONSTANT, value=0)
        if self.INVERT:
            padded_image = 255 - padded_image

        resized_image = cv2.resize(padded_image, (self.IMAGE_SIZE, self.IMAGE_SIZE))

        cropped_image = resized_image[self.SLICE:self.IMAGE_SIZE - self.SLICE,
                        self.SLICE:self.IMAGE_SIZE - self.SLICE]

        zoomed_image = cv2.resize(cropped_image, (self.IMAGE_SIZE, self.IMAGE_SIZE))

        centered_image = self.centerize(zoomed_image)

        if self.THRESHOLD != None:
            enhanced_image = self.enhance(centered_image)
            return enhanced_image
        else:
            return centered_image

    def process_images(self):
        """Process all images from the dataset."""
        self.CATEGORIES = self.get_categories()

        for category in self.CATEGORIES:
            category_path = os.path.join(self.PATH, category)
            class_index = self.CATEGORIES.index(category)

            for img_name in os.listdir(category_path):
                img_path = os.path.join(category_path, img_name)

                try:
                    image = self.preprocess_image(img_path)
                    if image is not None:
                        self.x_data.append(image)
                        self.y_data.append(class_index)
                except Exception as e:
                    print(f"Error processing {img_path}: {e}")

        X_Data = np.asarray(self.x_data) / 255.0
        Y_Data = np.asarray(self.y_data)
        X_Data = X_Data.reshape(-1, self.IMAGE_SIZE, self.IMAGE_SIZE)

        return X_Data, Y_Data

    def load_data(self):
        """Load and return the dataset."""

        print('Loading Files and Dataset ...')

        X_Data, Y_Data = self.process_images()

        X_train, X_test, y_train, y_test = train_test_split(X_Data, Y_Data, train_size=0.8, stratify=Y_Data,
                                                            random_state=0)

        return X_train, y_train, X_test, y_test


LABELS = {0: 'Alef',
          1: 'Be',
          2: 'Pe',
          3: 'Te',
          4: 'Se',
          5: 'Jim',
          6: 'Che',
          7: 'H',
          8: 'Khe',
          9: 'Dal',
          10: 'Zal',
          11: 'Re',
          12: 'Ze',
          13: 'Zhe',
          14: 'Sin',
          15: 'Shin',
          16: 'Sad',
          17: 'Zad',
          18: 'Ta',
          19: 'Za',
          20: 'Ayin',
          21: 'Ghayin',
          22: 'Fe',
          23: 'Ghaf',
          24: 'Kaf',
          25: 'Gaf',
          26: 'Lam',
          27: 'Mim',
          28: 'Noon',
          29: 'Vav',
          30: 'He',
          31: 'Ye',
          32: 'Zero',
          33: 'One',
          34: 'Two',
          35: 'Three',
          36: 'Four',
          37: 'Five',
          38: 'Six',
          39: 'Seven',
          40: 'Eight',
          41: 'Nine',
          42: 'Five'}

if __name__ == "__main__":
    DATASET_PATH = "Datasets/DS-2 changed"

    dataset_loader = DataLoader(path=DATASET_PATH, image_size=64, shrink=0, padding=15, threshold=None, invert=False)
    X_train, y_train, X_test, y_test = dataset_loader.load_data()

In [None]:
# Resnet 50
X_train_resnet = preprocess_input(X_train)
X_test_resnet = preprocess_input(X_test)
resnet_model = ResNet50(weights='imagenet', include_top=False, input_shape=(64, 64, 3))

model = Model(inputs=resnet_model.input, outputs=resnet_model.output)
X_train_features = model.predict(X_train_resnet)
X_test_features = model.predict(X_test_resnet)

# Flatten the features
X_train_features_flat = X_train_features.reshape(X_train_features.shape[0], -1)
X_test_features_flat = X_test_features.reshape(X_test_features.shape[0], -1)

# Standard Scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_features_flat)
X_test_scaled = scaler.transform(X_test_features_flat)

In [None]:
# Logistic Regression GridSearch
#>>>>> Accuracy of Logistic before gridsearch: %91
lg_params = {
    "C": [0.1, 1, 10, 100],
    "solver": ["lbfgs"],
    "penalty": ["l2", "none"],
    "max_iter": [100, 200],
}
lg_grid_srch = GridSearchCV(logistic_model, param_grid=lg_params, verbose=2)
lg_grid_srch.fit(X_train_scaled, Y_train)
print("Best Parameters:", lg_grid_srch.best_params_)
##>>>>> Accuracy of Logistic after gridsearch: %91

In [None]:
# Logistic Regression Model (With Best Parameters)
logistic_model = LogisticRegression(C=1, max_iter=100, penalty='l2', solver='lbfgs')
logistic_model.fit(X_train_scaled, Y_train)
y_pred = logistic_model.predict(X_test_scaled)
print(classification_report(Y_test, y_pred))

In [None]:
# KNN GridSearch
# >>>>> Accuracy of KNN before gridsearch: %77
knn_params = {
    'n_neighbors': [3, 5, 7, 9, 11],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski'],
    'p': [0.5, 1, 2]
}
knn_grid_srch = GridSearchCV(knn_model, param_grid=knn_params, verbose=2)
knn_grid_srch.fit(X_train_scaled, Y_train)
print("Best Parameters:", knn_grid_srch.best_params_)
###>>>>> Accuracy of KNN after gridsearch: %81

In [None]:
# KNN Model (With Best Parameters)
knn_model = KNeighborsClassifier(n_neighbors=5, metric='manhattan', p=0.5, weights='distance')
knn_model.fit(X_train_scaled, Y_train)
y_pred = knn_model.predict(X_test_scaled)
print(classification_report(Y_test, y_pred))

In [None]:
# SVM Gridsearch
# >>>>> Accuracy of SVM before gridsearch: %86
svm_params = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto'],
    'degree': [2, 3, 4]
}
svm_grid_srch = GridSearchCV(svm_model, param_grid=svm_params, verbose=2)
svm_grid_srch.fit(X_train_scaled, Y_train)
print("Best Parameters:", svm_grid_srch.best_params_)
###>>>>> Accuracy of SVM after gridsearch: %91

In [None]:
# SVM Model
svm_model = SVC(kernel='linear', C=0.1, gamma='scale')
svm_model.fit(X_train_scaled, Y_train)
y_pred = svm_model.predict(X_test_scaled)
print(classification_report(Y_test, y_pred))

In [None]:
# >>>>> Accuracy of Tree before gridsearch: %75
tree_params = {
    'criterion':['gini', 'entropy'],
    'max_depth':['None', 5, 10, 20],
    'min_samples_split':[2, 10, 20],
    'min_samples_leaf':[1, 5, 10],
}
tree_grid_srch = GridSearchCV(tree_model, param_grid=tree_params, verbose=2)
tree_grid_srch.fit(X_train_scaled, Y_train)
print("Best Parameters:", tree_grid_srch.best_params_)
###>>>>> Accuracy of Tree after gridsearch: %77

In [None]:
# Desision Tree
tree_model = DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_leaf=1, min_samples_split=2)
tree_model.fit(X_train_scaled, Y_train)
y_pred = tree_model.predict(X_test_scaled)
print(classification_report(Y_test, y_pred))