In [None]:
import os
import cv2
import numpy as np
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import pandas as pd


In [None]:
from google.colab import drive
drive.mount("/content/drive",force_remount=True)   #in case of remounting...

Mounted at /content/drive


In [None]:
data_dir = '/content/drive/MyDrive/Dataset'
print(os.listdir(data_dir))

['8', '6', '5', '9', '4', '3', '7', '1', '0', '2']


In [None]:
#Image preprocessing
from skimage.feature import hog
from sklearn.model_selection import train_test_split
data = []
labels = []


for label in range(10):
    folder = os.path.join(data_dir, str(label))

    for file in os.listdir(folder):
        img_path = os.path.join(folder, file)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            continue

        img = cv2.resize(img, (28, 28))
        img=img/255.0
        # HOG feature extraction
        features = hog(img, orientations=9, pixels_per_cell=(4,4),
               cells_per_block=(2,2), block_norm='L2-Hys')

        data.append(features)
        labels.append(label)


X = np.array(data)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

print("Training samples:", len(X_train))
print("Testing samples:", len(X_test))


Training samples: 688
Testing samples: 296


In [None]:
#training
clf = SVC(kernel='rbf', C=10, gamma=0.01)
clf.fit(X_train, y_train)

print("SVM trained successfully!")


SVM trained successfully!


In [None]:
#Checking the most suitable parameters

from sklearn.model_selection import GridSearchCV
param_grid = {'C':[1,10,100], 'gamma':[0.001,0.01,0.1], 'kernel':['rbf']}
grid = GridSearchCV(SVC(), param_grid, cv=3)
grid.fit(X_train, y_train)
print(grid.best_params_)

{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}


'clf.fit(X_train, y_train)\n\nprint("SVM trained successfully!")'

In [None]:

#Testing
y_pred = grid.predict(X_test)

# Accuracy
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

# Per-digit performance
report = classification_report(y_test, y_pred, output_dict=True)
df_report = pd.DataFrame(report).transpose()
print(df_report)


Accuracy: 0.6216216216216216
Confusion Matrix:
 [[17  1  2  2  0  2  1  3  0  1]
 [ 0 20  1  0  1  1  2  4  0  0]
 [ 0  4 19  1  1  1  0  2  1  1]
 [ 0  3  1 18  0  4  0  1  2  0]
 [ 0  5  1  2 14  2  0  2  1  2]
 [ 1  0  0  3  1 18  2  2  2  1]
 [ 0  0  4  0  0  4 19  1  1  1]
 [ 0  4  1  1  2  0  0 21  0  2]
 [ 0  1  1  1  1  1  1  3 20  1]
 [ 1  0  3  1  1  1  0  3  1 18]]
              precision    recall  f1-score     support
0              0.894737  0.586207  0.708333   29.000000
1              0.526316  0.689655  0.597015   29.000000
2              0.575758  0.633333  0.603175   30.000000
3              0.620690  0.620690  0.620690   29.000000
4              0.666667  0.482759  0.560000   29.000000
5              0.529412  0.600000  0.562500   30.000000
6              0.760000  0.633333  0.690909   30.000000
7              0.500000  0.677419  0.575342   31.000000
8              0.714286  0.666667  0.689655   30.000000
9              0.666667  0.620690  0.642857   29.000000
accur

In [None]:
#Excel sheet of Confusion matrix and Perfomance table
with pd.ExcelWriter("Digit_Recognition_Results.xlsx") as writer:
    pd.DataFrame(cm).to_excel(writer, sheet_name="Confusion_Matrix", index=False)
    df_report.to_excel(writer, sheet_name="Performance_Table")

print("Excel file saved: Digit_Recognition_Results.xlsx")


Excel file saved: Digit_Recognition_Results.xlsx


In [None]:
!ls /content


Batch_Predictions.xlsx		drive	     svm_digit_model.pkl
Digit_Recognition_Results.xlsx	sample_data


In [None]:
from google.colab import files
files.download("Digit_Recognition_Results.xlsx")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
#Saving model
import joblib
joblib.dump(clf, "svm_digit_model.pkl")
print("Model saved!")


Model saved!


In [None]:
#Loading model
import joblib
clf = joblib.load("svm_digit_model.pkl")
print("Model loaded successfully!")


Model loaded successfully!


In [None]:
import cv2
import numpy as np
from skimage.feature import hog

def predict_digit(img_path):
    # images to grayscale
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print("Error: Image not found at", img_path)
        return

    # Resizing to same dimensions as training images
    img = cv2.resize(img, (28, 28))

    # Extracting HOG features
    features = hog(img, orientations=9, pixels_per_cell=(4,4),
                   cells_per_block=(2,2), block_norm='L2-Hys')

    # Reshaping
    features = features.reshape(1, -1)

    #  Prediction
    pred = clf.predict(features)

    print(f"Predicted digit: {pred[0]}")
    return pred[0]


In [None]:
img_path = "/content/drive/MyDrive/Digit_test/og-3-1.jpg"
predict_digit(img_path)


3

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog

# Loading model
import joblib
clf = joblib.load("svm_digit_model.pkl")

# Test images
folder = "/content/drive/MyDrive/Digit_test/"

#result list
results = []

def predict_digit(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print("Error loading the image:", img_path)
        return None
    img = cv2.resize(img, (28,28))
    features = hog(img, orientations=9, pixels_per_cell=(4,4),
                   cells_per_block=(2,2), block_norm='L2-Hys')
    features = features.reshape(1, -1)
    pred = int(clf.predict(features)[0])
    return pred

# Looping all images
for file in os.listdir(folder):
    if file.lower().endswith((".jpg", ".png", ".jpeg")):
        path = os.path.join(folder, file)
        predicted_digit = predict_digit(path)
        results.append({"Filename": file, "Predicted Digit": predicted_digit})


df_results = pd.DataFrame(results)


print(df_results)


df_results.to_excel("Batch_Predictions.xlsx", index=False)



          Filename  Predicted Digit
0       og-1-1.jpg                1
1       og-2-1.jpg                2
2       og-3-2.jpg                8
3       og-7-1.jpg                7
4       og-0-2.jpg                9
5       og-8-1.jpg                8
6       og-9-1.jpg                9
7       og-4-2.jpg                6
8       og-6-1.jpg                6
9       og-6-2.jpg                5
10  og-5-2 (1).jpg                0
11      og-4-1.jpg                4
12      og-3-1.jpg                3
13      og-5-1.jpg                6
14      og-0-1.jpg                0
15      og-9-2.jpg                9
16      og-5-3.jpg                2
