# Training

In [1]:
import os

DATASET_NAME = "drsprg"
DATA_BASE_DIR = f"../data/processed/{DATASET_NAME}/"
IMAGES_DIR = os.path.join(DATA_BASE_DIR, "jpgs/")
LBP_DATASET = os.path.join(DATA_BASE_DIR, "artifacts/lbp_dataset.pkl")

# Training related parameters
SEED = 42
TEST_SIZE = 0.2
RS_N_ITER = 40
CV = 20

In [2]:
import joblib
import numpy as np
from scipy.stats import randint
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import (
    RandomizedSearchCV,
    cross_val_score,
    train_test_split,
)
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from xgboost import XGBClassifier

In [3]:
np.random.seed(SEED)

## LBP

In [4]:
lbp_dataset = joblib.load(LBP_DATASET)

In [5]:
X = np.array([sample[0] for sample in lbp_dataset])
y = np.array([sample[1] for sample in lbp_dataset])

In [6]:
X.shape, y.shape

((102, 1620), (102,))

In [7]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

### SVM

In [8]:
def run(model, X, y, cv):
    metrics = ["precision", "recall", "f1", "accuracy"]
    _ = [
        print(f"{metric}: {cross_val_score(model, X, y, scoring=metric, cv=cv).mean()}")
        for metric in metrics
    ]

#### Radial Basis Function (RBF)

In [9]:
rbf_svc = SVC(kernel="rbf")
run(rbf_svc, X_scaled, y, CV)

precision: 0.8058333333333334
recall: 0.8833333333333334
f1: 0.8254761904761905
accuracy: 0.785


#### Kernels Linear

In [10]:
linear_svc = SVC(kernel="linear")
run(linear_svc, X_scaled, y, CV)

precision: 0.8125
recall: 0.7416666666666666
f1: 0.7502380952380951
accuracy: 0.7433333333333333


### Random Forest

In [11]:
rf = RandomForestClassifier()

run(rf, X_scaled, y, CV)

precision: 0.7966666666666666
recall: 0.9
f1: 0.8479761904761904
accuracy: 0.8233333333333333


### XGBoost

In [12]:
xgbc = XGBClassifier()
run(xgbc, X_scaled, y, CV)

precision: 0.8458333333333332
recall: 0.875
f1: 0.8490476190476193
accuracy: 0.8216666666666667
