In [1]:
import os
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
import cv2

In [2]:
train_folder = './train'
test_folder = './test'

digit_mapping = {
    'zero': 0,
    'one': 1,
    'two': 2,
    'three': 3,
    'four': 4,
    'five': 5,
    'six': 6,
    'seven': 7,
    'eight': 8,
    'nine': 9
}

# Load training data
train_data = []
train_labels = []

for digit_folder in os.listdir(train_folder):
    digit_path = os.path.join(train_folder, digit_folder)

    for image_name in os.listdir(digit_path):
        image_path = os.path.join(digit_path, image_name)
        img = cv2.imread(image_path, 0)
        img = cv2.resize(img, (64, 64)) 
        train_data.append(img.flatten())
        train_labels.append(digit_mapping[digit_folder])

# Convert training data to numpy arrays
X_train = np.array(train_data, dtype=np.float32)
y_train = np.array(train_labels, dtype=np.int32)

# Normalize pixel values to the range [0, 1]
X_train /= 255.0

# Load test data
test_data = []
test_labels = []

for digit_folder in os.listdir(test_folder):
    digit_path = os.path.join(test_folder, digit_folder)

    for image_name in os.listdir(digit_path):
        image_path = os.path.join(digit_path, image_name)
        img = cv2.imread(image_path, 0)
        img = cv2.resize(img, (64, 64)) 
        test_data.append(img.flatten())
        test_labels.append(digit_mapping[digit_folder])

# Convert test data to numpy arrays
X_test = np.array(test_data, dtype=np.float32)
y_test = np.array(test_labels, dtype=np.int32)

# Normalize pixel values to the range [0, 1]
X_test /= 255.0


In [3]:

from sklearn.discriminant_analysis import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline


models = {
    'Linear': make_pipeline(StandardScaler(), svm.SVC(kernel='linear')),
    'Poly': make_pipeline(StandardScaler(), svm.SVC(kernel='poly')),
    'RBF': make_pipeline(StandardScaler(), svm.SVC(kernel='rbf')),
    'Sigmoid': make_pipeline(StandardScaler(), svm.SVC(kernel='sigmoid'))
}

for kernel, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy with {kernel} kernel: {accuracy}\n*********************")

for kernel, model in models.items():
    param_grid = {'svc__C': [1, 10, 100, 1000], 'svc__kernel': [kernel.lower()]}
    grid = GridSearchCV(model, param_grid, cv=5)
    grid.fit(X_train, y_train)
    
    print(f"Best parameters found for {kernel} kernel: {grid.best_params_}")

    # Use the best model from hyperparameter tuning
    best_model = grid.best_estimator_
    y_pred_best = best_model.predict(X_test)
    accuracy_best = accuracy_score(y_test, y_pred_best)
    print(f"Accuracy with best {kernel} model: {accuracy_best}")
    print("*********************")



Accuracy with Linear kernel: 0.9282511210762332
*********************
Accuracy with Poly kernel: 0.9451918285999004
*********************
Accuracy with RBF kernel: 0.9471848530144494
*********************
Accuracy with Sigmoid kernel: 0.8784255107125062
*********************
Best parameters found for Linear kernel: {'svc__C': 1, 'svc__kernel': 'linear'}
Accuracy with best Linear model: 0.9282511210762332
*********************
Best parameters found for Poly kernel: {'svc__C': 100, 'svc__kernel': 'poly'}
Accuracy with best Poly model: 0.9506726457399103
*********************
Best parameters found for RBF kernel: {'svc__C': 10, 'svc__kernel': 'rbf'}
Accuracy with best RBF model: 0.9516691579471849
*********************
Best parameters found for Sigmoid kernel: {'svc__C': 1, 'svc__kernel': 'sigmoid'}
Accuracy with best Sigmoid model: 0.8784255107125062
*********************
