In [3]:
import os
import numpy as np
from keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from skimage.feature import hog
from sympy import sympify
import cv2
import matplotlib.pyplot as plt
import joblib
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint
import joblib
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import log_loss, brier_score_loss
from sklearn.preprocessing import label_binarize


In [4]:
data_dir = "data"
img_size = 45

# Collect labels
symbol_labels = sorted([d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))])

# Label mappings
symbol_to_index = {label: idx for idx, label in enumerate(symbol_labels)}
index_to_label = {v: k for k, v in symbol_to_index.items()}

X_symbols, y_symbols = [], []

# Load dataset images
for label in symbol_labels:
    folder_path = os.path.join(data_dir, label)
    for file in os.listdir(folder_path):
        if file.endswith(".jpg"):
            try:
                img_path = os.path.join(folder_path, file)
                img = load_img(img_path, color_mode="grayscale", target_size=(img_size, img_size))
                img = img_to_array(img) / 255.0
                X_symbols.append(img)
                y_symbols.append(symbol_to_index[label])
            except Exception as e:
                print(f"Error loading {img_path}: {e}")

# Convert to numpy arrays
X_all = np.array(X_symbols)
y_all = np.array(y_symbols)

print("Dataset loaded:", X_all.shape, y_all.shape)
print("Classes:", index_to_label)

Dataset loaded: (186134, 45, 45, 1) (186134,)
Classes: {0: '(', 1: ')', 2: '+', 3: '-', 4: '0', 5: '1', 6: '2', 7: '3', 8: '4', 9: '5', 10: '6', 11: '7', 12: '8', 13: '9', 14: 'div', 15: 'times'}


In [5]:
# Train + temp set split
X_train, X_temp, y_train, y_temp = train_test_split(
    X_all, y_all, test_size=0.3, random_state=42, stratify=y_all
)

# Validation + Test split (from temp)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print("Train:", X_train.shape, y_train.shape)
print("Val:", X_val.shape, y_val.shape)
print("Test:", X_test.shape, y_test.shape)

Train: (130293, 45, 45, 1) (130293,)
Val: (27920, 45, 45, 1) (27920,)
Test: (27921, 45, 45, 1) (27921,)


In [6]:
def extract_hog_features(images):
    features = []
    for img in images:
        img = img.squeeze()
        hog_feat = hog(
            img,
            orientations=9,
            pixels_per_cell=(4, 4),
            cells_per_block=(2, 2),
            block_norm="L2-Hys"
        )
        features.append(hog_feat)
    return np.array(features)

X_train_hog = extract_hog_features(X_train)
X_val_hog   = extract_hog_features(X_val)
X_test_hog  = extract_hog_features(X_test)

In [7]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

# Random Forest med default params + n_jobs=-1
rf_hog = RandomForestClassifier(n_jobs=-1, random_state=42)

# Extra Trees med default params + n_jobs=-1
et_hog = ExtraTreesClassifier(n_jobs=-1, random_state=42)

# SVM med linear kernel (default params förutom kernel och random_state)
svm_hog = SVC(kernel="linear", random_state=42)

# KNN med default params + n_jobs=-1
knn_hog = KNeighborsClassifier(n_jobs=-1)

# Träna modellerna
rf_hog.fit(X_train_hog, y_train)
et_hog.fit(X_train_hog, y_train)
svm_hog.fit(X_train_hog, y_train)
knn_hog.fit(X_train_hog, y_train)

# Utvärdera på valideringsdata
print("RF val acc:", rf_hog.score(X_val_hog, y_val))
print("ET val acc:", et_hog.score(X_val_hog, y_val))
print("SVM val acc:", svm_hog.score(X_val_hog, y_val))
print("KNN val acc:", knn_hog.score(X_val_hog, y_val))


RF val acc: 0.997528653295129
ET val acc: 0.9974928366762178
SVM val acc: 0.994878223495702
KNN val acc: 0.9685530085959886


NameError: name 'x_img_hog' is not defined