# Train and Apply Models

In [1]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr
import numpy as np
import math
from ML.model_training import omit_patient_video, train_random_forest, train_knn

## Split Training and Testing Data

In [2]:
X_train, X_test, arousal_train, arousal_test = omit_patient_video(target="arousal")

Remove desired columns and bands: Shannons Entropy, 

## KNN Model

Train KNN

In [3]:
best_model = None
best_mse = math.inf
best_n = 1
for n in [1, 3, 5, 7, 11, 21]:
    knn, X_test, y_test = train_knn(X_train, X_test, arousal_train, arousal_test, neighbors=n)

    arousal_pred = knn.predict(X_test)
    mse = mean_squared_error(y_test, arousal_pred)

    if mse < best_mse:
        best_mse = mse
        best_model = knn
        best_n = n

In [4]:
arousal_pred = best_model.predict(X_test)

print("N_Neighbors used:",  best_n)
r2 = r2_score(y_test, arousal_pred)
mae = mean_absolute_error(y_test, arousal_pred)
mse = mean_squared_error(y_test, arousal_pred)
rmse = np.sqrt(mse)
r, pval = pearsonr(y_test, arousal_pred)
# print(f"PCC: {r:.3f}  (p={pval:.3g})")

print("KNN Regression Performance:")
print(f"R²:   {r2:.4f}")
print(f"MAE:  {mae:.4f}")
print(f"MSE:  {mse:.4f}")
print(f"RMSE: {rmse:.4f}")

importances = getattr(best_model, "feature_importances_", None)
if importances is not None:
    top = 10
    order = np.argsort(importances)[::-1][:top]
    print("\nTop feature importances:")
    for i in order:
        print(f"{X_test.columns[i]}: {importances[i]:.4f}")


N_Neighbors used: 21
KNN Regression Performance:
R²:   0.3318
MAE:  0.4292
MSE:  0.3749
RMSE: 0.6123


### Train RF Regressor

In [None]:
rf, X_test, y_test = train_random_forest(X_train, X_test, arousal_train, arousal_test)