In [45]:
import os
import cv2
import numpy as np
from skimage.feature import hog
from skimage import exposure
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
import joblib
import random
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier  # MLP is an NN
from sklearn import svm
#TODO:
#test prediciton

In [1]:
#paper extraction
import cv2
import numpy as np
from utils import *


def reorder_points(points):
    points = points.reshape((4, 2))
    newPoints = np.zeros((4, 1, 2), dtype=np.int32)
    add = points.sum(1)
    diff = np.diff(points, axis=1)
    minSumIndex = np.argmin(add)
    maxSumIndex = np.argmax(add)
    minDiffIndex = np.argmin(diff)
    maxDiffIndex = np.argmax(diff)
    newPoints[0] = points[minSumIndex]
    newPoints[3] = points[maxSumIndex]
    newPoints[1] = points[minDiffIndex]
    newPoints[2] = points[maxDiffIndex]
    return newPoints


def extract_paper_region(input_img, iterations=1):
    output_image = input_img

    for i in range(iterations):
        input_img = output_image
        img_RGB = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
        img_gray = cv2.cvtColor(img_RGB, cv2.COLOR_BGR2GRAY)

        canny_edged_image = cv2.Canny(img_gray, 100, 255)

        win = np.ones((5, 5), np.uint8)
        canny_edged_image = cv2.dilate(canny_edged_image, win, iterations=2)
        contours, _ = cv2.findContours(
            canny_edged_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

        image_with_contours = np.copy(img_RGB)
        cv2.drawContours(image_with_contours, contours, -1, (0, 255, 0), 20)

        largest_contour = np.array([])
        mxArea = 0

        for contour in contours:
            area = cv2.contourArea(contour)
            perimeter = cv2.arcLength(contour, True)
            approximated_edge = cv2.approxPolyDP(
                contour, 0.02 * perimeter, True)
            if area > mxArea and len(approximated_edge) == 4:
                mxArea = area
                largest_contour = approximated_edge

        biggest_contour = reorder_points(largest_contour)

        y, x = img_gray.shape[:2]

        pts1 = np.array(biggest_contour, np.float32)
        pts2 = np.array([[0, 0], [x, 0], [0, y], [x, y]], np.float32)

        matrix = cv2.getPerspectiveTransform(pts1, pts2)

        output_image = cv2.warpPerspective(img_gray, matrix, (x, y))

    return output_image


ModuleNotFoundError: No module named 'utils'

In [42]:
target_img_size = (32, 32)
random_seed = 42  
random.seed(random_seed)
np.random.seed(random_seed)

def extract_hog_features(img):
    img = cv2.resize(img, dsize=target_img_size)
    win_size = (32, 32)
    cell_size = (4, 4)
    block_size_in_cells = (2, 2)
    block_size = (block_size_in_cells[1] * cell_size[1], block_size_in_cells[0] * cell_size[0])
    block_stride = (cell_size[1], cell_size[0])
    nbins = 9  # Number of orientation bins
    hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins)
    h = hog.compute(img)
    return h.flatten()

# Function to load images and labels from the dataset
def load_dataset_digits(root_folder):
    features = []
    labels = []
    for digit in range(10):
        digit_folder = os.path.join(root_folder, str(digit))
        for filename in os.listdir(digit_folder):
            if filename.endswith(".jpg"):
                img_path = os.path.join(digit_folder, filename)
                img = cv2.imread(img_path, 0) 
                features.append(extract_hog_features(img))
                labels.append(digit)
    return np.array(features), np.array(labels)
def load_dataset_symbols(root_folder):
    features = []
    labels = []
    label_names = []  # To store the label names corresponding to each label

    label_index = 0
    for label_name in os.listdir(root_folder):
        label_folder = os.path.join(root_folder, label_name)
        if os.path.isdir(label_folder):
            for filename in os.listdir(label_folder):
                if filename.endswith(".jpg"):
                    img_path = os.path.join(label_folder, filename)
                    img = cv2.imread(img_path, 0)
                    features.append(extract_hog_features(img))
                    labels.append(label_index)
                    label_names.append(label_name)
            label_index += 1

    return np.array(features), np.array(labels), label_names
classifiers = {
    'SVM': svm.LinearSVC(random_state=random_seed),
    # 'KNN': KNeighborsClassifier(n_neighbors=7),
    # 'NN': MLPClassifier(solver='sgd', random_state=random_seed, hidden_layer_sizes=(500,), max_iter=20, verbose=1)
}

In [43]:
# train svm model for digit detection
dataset_root_digits = "./dataset/digits"
features, labels = load_dataset_digits(dataset_root_digits)
train_features, test_features, train_labels, test_labels = train_test_split(
    features, labels, test_size=0.2, random_state=random_seed)
for model_name, model in classifiers.items():
    model.fit(train_features, train_labels)
    accuracy = model.score(test_features, test_labels)
    print(model_name, 'accuracy:', accuracy*100, '%')
    model_filename = "digits_model.joblib"
    joblib.dump(model, model_filename)
    print("Model saved")


SVM accuracy: 90.54054054054053 %
Model saved


In [44]:
# train svm model for symbol detection

dataset_root_symbols = "./dataset/symbols"
features, labels, label_names = load_dataset_symbols(dataset_root_symbols)
train_features, test_features, train_labels, test_labels = train_test_split(
    features, label_names, test_size=0.2, random_state=random_seed)

for model_name, model in classifiers.items():
    model.fit(train_features, train_labels)
    accuracy = model.score(test_features, test_labels)
    print(model_name, 'accuracy:', accuracy*100, '%')
    model_filename = "symbols_model.joblib"
    joblib.dump(model, model_filename)
    print("Model saved")



SVM accuracy: 98.05825242718447 %
Model saved
