In [1]:
import numpy as np
import pandas as pd
import os
import joblib
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification

from sklearn.ensemble import (
    RandomForestClassifier, 
    ExtraTreesClassifier,
    AdaBoostClassifier,
    GradientBoostingClassifier,
)

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

from xgboost import XGBClassifier

In [2]:
df_no_hr_mean = pd.read_csv("../data/train/bigboy_no_hr_mean.csv")
df_with_hr_mean = pd.read_csv("../data/train/bigboy_with_hr_mean.csv")

x_no_hr_mean = df_no_hr_mean[['cosine_feature', 'count_feature', 'hr_std', 'time_feature']]
x_with_hr_mean = df_with_hr_mean[['cosine_feature', 'count_feature', 'hr_std', 'hr_mean', 'time_feature']]

y_no_hr_mean = df_no_hr_mean['psg_label']
y_with_hr_mean = df_with_hr_mean['psg_label']

y_no_hr_mean.value_counts()

psg_label
2    11203
5     5030
3     3252
0     1907
1     1475
Name: count, dtype: int64

In [3]:
y_no_hr_mean.replace({5: 4}, inplace=True)
y_with_hr_mean.replace({5: 4}, inplace=True)

In [4]:
x_no_hr_mean_train, x_no_hr_mean_test, y_no_hr_mean_train, y_no_hr_mean_test = train_test_split(x_no_hr_mean, y_no_hr_mean, test_size=0.2)
print(x_no_hr_mean_train.shape, x_no_hr_mean_test.shape, y_no_hr_mean_train.shape, y_no_hr_mean_test.shape)

x_with_hr_mean_train, x_with_hr_mean_test, y_with_hr_mean_train, y_with_hr_mean_test = train_test_split(x_with_hr_mean, y_with_hr_mean, test_size=0.2)
print(x_with_hr_mean_train.shape, x_with_hr_mean_test.shape, y_with_hr_mean_train.shape, y_with_hr_mean_test.shape)

(18293, 4) (4574, 4) (18293,) (4574,)
(18293, 5) (4574, 5) (18293,) (4574,)


In [5]:
models_dir = "../saved_models/no_hr_mean"

class_weights = {
    0: 1,  # Wake
    1: 1,  # N1
    2: 1,  # N2
    3: 1,  # N3 (Deep Sleep) - Make this "heavier" so the model is scared to misclassify it
    4: 1   # REM
}

algorithms = {
    'Random Forest': RandomForestClassifier(class_weight=class_weights, n_estimators=200, min_samples_leaf=5),
    'Extra Trees': ExtraTreesClassifier(class_weight=class_weights, n_estimators=200, min_samples_leaf=5),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Logistic Regression': LogisticRegression(class_weight=class_weights),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Neural Network': MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=1000),
    'XGBoost': XGBClassifier()
}

for name, clf in algorithms.items():
    clf.fit(x_no_hr_mean_train, y_no_hr_mean_train)

    y_pred = clf.predict(x_no_hr_mean_test)
    exact_score = clf.score(x_no_hr_mean_test, y_no_hr_mean_test)
    print(f"{name:<20} | {exact_score:.4f}")    
    # print(f"{name}: {clf.features_}")
    # print(f"{name}: {clf.feature_importances_}")

    model_filename = os.path.join(models_dir, f"{name.replace(' ', '_')}.joblib")
    joblib.dump(clf, model_filename)
    print(f"Saved {name} to {model_filename}")

Random Forest        | 0.6839
Saved Random Forest to ../saved_models/no_hr_mean\Random_Forest.joblib
Extra Trees          | 0.6572
Saved Extra Trees to ../saved_models/no_hr_mean\Extra_Trees.joblib
Gradient Boosting    | 0.6135
Saved Gradient Boosting to ../saved_models/no_hr_mean\Gradient_Boosting.joblib


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression  | 0.5179
Saved Logistic Regression to ../saved_models/no_hr_mean\Logistic_Regression.joblib
K-Nearest Neighbors  | 0.6334
Saved K-Nearest Neighbors to ../saved_models/no_hr_mean\K-Nearest_Neighbors.joblib
Neural Network       | 0.5990
Saved Neural Network to ../saved_models/no_hr_mean\Neural_Network.joblib
XGBoost              | 0.6707
Saved XGBoost to ../saved_models/no_hr_mean\XGBoost.joblib


In [6]:
models_dir = "../saved_models/with_hr_mean"

class_weights = {
    0: 1,  # Wake
    1: 1,  # N1
    2: 1,  # N2
    3: 1,  # N3 (Deep Sleep) - Make this "heavier" so the model is scared to misclassify it
    4: 1   # REM
}

algorithms = {
    'Random Forest': RandomForestClassifier(class_weight=class_weights, n_estimators=200, min_samples_leaf=5),
    'Extra Trees': ExtraTreesClassifier(class_weight=class_weights, n_estimators=200, min_samples_leaf=5),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Logistic Regression': LogisticRegression(class_weight=class_weights),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Neural Network': MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=1000),
    'XGBoost': XGBClassifier()
}

for name, clf in algorithms.items():
    clf.fit(x_with_hr_mean_train, y_with_hr_mean_train)

    y_pred = clf.predict(x_with_hr_mean_test)
    exact_score = clf.score(x_with_hr_mean_test, y_with_hr_mean_test)
    print(f"{name:<20} | {exact_score:.4f}")    
    # print(f"{name}: {clf.features_}")
    # print(f"{name}: {clf.feature_importances_}")

    model_filename = os.path.join(models_dir, f"{name.replace(' ', '_')}.joblib")
    joblib.dump(clf, model_filename)
    print(f"Saved {name} to {model_filename}")

Random Forest        | 0.7801
Saved Random Forest to ../saved_models/with_hr_mean\Random_Forest.joblib
Extra Trees          | 0.7440
Saved Extra Trees to ../saved_models/with_hr_mean\Extra_Trees.joblib
Gradient Boosting    | 0.6587
Saved Gradient Boosting to ../saved_models/with_hr_mean\Gradient_Boosting.joblib


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression  | 0.5813
Saved Logistic Regression to ../saved_models/with_hr_mean\Logistic_Regression.joblib
K-Nearest Neighbors  | 0.7484
Saved K-Nearest Neighbors to ../saved_models/with_hr_mean\K-Nearest_Neighbors.joblib
Neural Network       | 0.6469
Saved Neural Network to ../saved_models/with_hr_mean\Neural_Network.joblib
XGBoost              | 0.7538
Saved XGBoost to ../saved_models/with_hr_mean\XGBoost.joblib
