# Training

In [138]:
import os

DATASET_NAME = "drsprg"
DATA_BASE_DIR = f"../data/processed/{DATASET_NAME}/"
IMAGES_DIR = os.path.join(DATA_BASE_DIR, "jpgs/")
LBP_DATASET = os.path.join(DATA_BASE_DIR, "artifacts/lbp_dataset.pkl")

# Traning related parameters
SEED = 42
TEST_SIZE = 0.2
RS_N_ITER = 40
CV = 10

In [139]:
import joblib
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_predict
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.svm import SVC

In [140]:
np.random.seed(SEED)

## LBP

In [141]:
lbp_dataset = joblib.load(LBP_DATASET)

In [142]:
X = np.array([sample[0] for sample in lbp_dataset])
y = np.array([sample[1] for sample in lbp_dataset])

In [143]:
X.shape, y.shape

((102, 1620), (102,))

In [144]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=SEED, stratify=y)
len(X_train), len(X_test), len(y_train), len(y_test)

(81, 21, 81, 21)

In [145]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### SVM

#### Radial Basis Function (RBF)

In [146]:
svc = SVC()

In [147]:
param_grid = {
    "C": np.logspace(-3, 3, 7),
    "gamma": np.logspace(-3, 3, 7),
    "kernel": ["rbf"],
}

random_search = RandomizedSearchCV(
    estimator=svc,
    param_distributions=param_grid,
    n_iter=RS_N_ITER,
    cv=CV,
    random_state=SEED,
)
random_search.fit(X_train_scaled, y_train)

print("Best hyperparameters:", random_search.best_params_)

test_accuracy = random_search.score(X_test_scaled, y_test)
print("Test set accuracy:", test_accuracy)

Best hyperparameters: {'kernel': 'rbf', 'gamma': 0.01, 'C': 0.09999999999999999}
Test set accuracy: 0.7619047619047619


In [148]:
cv_preds = cross_val_predict(svc, X_train_scaled, y_train, cv=CV)
report = classification_report(y_train, cv_preds)
print(report)

              precision    recall  f1-score   support

           0       0.71      0.71      0.71        35
           1       0.78      0.78      0.78        46

    accuracy                           0.75        81
   macro avg       0.75      0.75      0.75        81
weighted avg       0.75      0.75      0.75        81



In [149]:
svc = SVC(**random_search.best_params_)
svc.fit(X_train_scaled, y_train)
y_pred = svc.predict(X_test_scaled)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.83      0.56      0.67         9
           1       0.73      0.92      0.81        12

    accuracy                           0.76        21
   macro avg       0.78      0.74      0.74        21
weighted avg       0.78      0.76      0.75        21



#### Kernels Linear

In [150]:
svc = SVC()

param_grid = {
    "C": np.logspace(-3, 3, 7),
    "gamma": np.logspace(-3, 3, 7),
    "kernel": ["linear"],
}

random_search = RandomizedSearchCV(
    estimator=svc,
    param_distributions=param_grid,
    n_iter=RS_N_ITER,
    cv=CV,
    random_state=SEED,
)
random_search.fit(X_train_scaled, y_train)

print("Best hyperparameters:", random_search.best_params_)

test_accuracy = random_search.score(X_test_scaled, y_test)
print("Test set accuracy:", test_accuracy)

Best hyperparameters: {'kernel': 'linear', 'gamma': 10.0, 'C': 0.001}
Test set accuracy: 0.6666666666666666


In [151]:
cv_preds = cross_val_predict(svc, X_train_scaled, y_train, cv=CV)
report = classification_report(y_train, cv_preds)
print(report)

              precision    recall  f1-score   support

           0       0.71      0.71      0.71        35
           1       0.78      0.78      0.78        46

    accuracy                           0.75        81
   macro avg       0.75      0.75      0.75        81
weighted avg       0.75      0.75      0.75        81



In [152]:
svc = SVC(**random_search.best_params_)
svc.fit(X_train_scaled, y_train)
y_pred = svc.predict(X_test_scaled)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.58      0.78      0.67         9
           1       0.78      0.58      0.67        12

    accuracy                           0.67        21
   macro avg       0.68      0.68      0.67        21
weighted avg       0.69      0.67      0.67        21

