**Libraries**

In [2]:
import pandas as pd
import numpy as np
from skimage.feature import hog
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import joblib  # for saving model

**Load CSVs**

In [3]:
train_df = pd.read_csv('../data/sign_mnist_train.csv')
test_df = pd.read_csv('../data/sign_mnist_test.csv')

y_train = train_df['label'].values
X_train_raw = train_df.drop('label', axis=1).values

y_test = test_df['label'].values
X_test_raw = test_df.drop('label', axis=1).values


**Reshape into 2D images**

In [4]:
X_train_images = X_train_raw.reshape(-1,28,28)
X_test_images = X_test_raw.reshape(-1,28,28)


**Compute HOG features**

In [64]:
def compute_hog_features(images):
    features = []
    for img in images:
        hog_vec = hog(
            img,
            orientations=12,
            pixels_per_cell=(4,4),
            cells_per_block=(2,2),
            block_norm='L2-Hys'
        )
        features.append(hog_vec)
    return np.array(features)

X_train_hog = compute_hog_features(X_train_images)
X_test_hog = compute_hog_features(X_test_images)


**Feature Scaling**

In [52]:
from sklearn.preprocessing import StandardScaler

# Feature scaling
scaler = StandardScaler()
X_train_hog_scaled = scaler.fit_transform(X_train_hog)  # fit on train
X_test_hog_scaled = scaler.transform(X_test_hog)        # transform test with same scaler


**Train Classifiers**

In [53]:
model = RandomForestClassifier(
    n_estimators = 500,     # number of trees
    max_depth = 30,
    min_samples_split= 2,  # default
    min_samples_leaf= 1,
    random_state = 42
)
model.fit(X_train_hog, y_train)


0,1,2
,n_estimators,700
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,2
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


**Evaluate**

In [54]:
y_pred = model.predict(X_test_hog)
print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.9053262688232013


**Save the trained model**

In [55]:
# joblib.dump(model, '../models/sign_model.pkl') # 89.3

#joblib.dump(model, '../models/sign_model_500_30_HOG12.pkl') # 93.3

# joblib.dump(model, '../models/sign_model_200_35_HOG16.pkl')  # 90.68

# joblib.dump(model, '../models/sign_model_200_30_HOG12.pkl') # 91.06

# joblib.dump(model, '../models/sign_model_700_HOG15.pkl') # 90.53

['../models/sign_model_700_HOG15.pkl']

In [75]:
import joblib

model = joblib.load('../models/sign_model_500_30_HOG12.pkl')

model = data['model']
hog_params = data['hog_params']

print("Expected feature count:", model.n_features_in_)

print("X_test_hog shape:", X_test_hog.shape)


Expected feature count: 1728
X_test_hog shape: (7172, 1728)


In [62]:
y_pred = model.predict(X_test_hog)

acc = accuracy_score(y_test, y_pred)
print("Accuracy of the model:", acc * 100, "%")

Accuracy of the model: 93.36307863915226 %


In [70]:
import os

# Make sure models folder exists
os.makedirs('models', exist_ok=True)

model = joblib.load('../models/sign_model_500_30_HOG12.pkl')

model_data = {
    'model': model,
    'hog_params': {
        'orientations': 12,
        'pixels_per_cell': (4,4),
        'cells_per_block': (2,2)
    }
}

joblib.dump(model_data, '../models/sign_model_500_30_HOG12.pkl')

['../models/sign_model_500_30_HOG12.pkl']

In [73]:
data = joblib.load('../models/sign_model_500_30_HOG12.pkl')
model = data['model']
hog_params = data['hog_params']
print(hog_params)


{'orientations': 12, 'pixels_per_cell': (4, 4), 'cells_per_block': (2, 2)}
