In [36]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
# from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

from sklearn.ensemble import (
    RandomForestClassifier, 
    ExtraTreesClassifier,
    AdaBoostClassifier,
    GradientBoostingClassifier
)
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

In [37]:
df = pd.read_csv("data/bigboy.csv")

In [38]:
x = df[['cosine_feature', 'count_feature', 'hr_feature', 'time_feature']]
y = df['psg_label']

In [39]:
x, y

(       cosine_feature  count_feature  hr_feature  time_feature
 0           -0.258819       0.860988    0.278078      0.000000
 1           -0.260926       0.618550    0.270274      0.008333
 2           -0.263031       0.608050    0.260905      0.016667
 3           -0.265135       0.891078    0.252579      0.025000
 4           -0.267238       1.552097    0.244514      0.033333
 ...               ...            ...         ...           ...
 25476       -0.751840       0.000000    0.130491      7.750000
 25477       -0.750400       0.000000    0.134184      7.758333
 25478       -0.748956       0.000000    0.138695      7.766667
 25479       -0.747508       0.000000    0.142484      7.775000
 25480       -0.746057       0.000000    0.148858      7.783333
 
 [25481 rows x 4 columns],
 0        0
 1        0
 2        0
 3        0
 4        0
         ..
 25476    5
 25477    5
 25478    5
 25479    5
 25480    5
 Name: psg_label, Length: 25481, dtype: int64)

In [40]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [41]:
x_train.shape, y_train.shape

((20384, 4), (20384,))

In [42]:
x_test.shape, y_test.shape

((5097, 4), (5097,))

In [43]:
clf = RandomForestClassifier()
clf.fit(x_train, y_train)

In [44]:
print(clf.score(x_test, y_test))

0.6754953894447714


In [45]:
clf.feature_importances_

array([0.214207  , 0.15771797, 0.38818354, 0.23989149])

In [46]:
import os
import joblib

# Create a directory to save the models
models_dir = "saved_models"
if not os.path.exists(models_dir):
    os.makedirs(models_dir)

algorithms = {
    'Random Forest': RandomForestClassifier(),
    'Extra Trees': ExtraTreesClassifier(),
    'AdaBoost': AdaBoostClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Logistic Regression': LogisticRegression(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Neural Network': MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=1000)
}

for name, clf in algorithms.items():
    clf.fit(x_train, y_train)
    score = clf.score(x_test, y_test)
    print(f"{name}: {score:.4f}")
    
    # Save the model
    model_filename = os.path.join(models_dir, f"{name.replace(' ', '_')}.joblib")
    joblib.dump(clf, model_filename)
    print(f"Saved {name} to {model_filename}")
    # print(f"{name}: {clf.features_}")
    # print(f"{name}: {clf.feature_importances_}")

Random Forest: 0.6733
Saved Random Forest to saved_models/Random_Forest.joblib
Extra Trees: 0.6314
Saved Extra Trees to saved_models/Extra_Trees.joblib
Extra Trees: 0.6314
Saved Extra Trees to saved_models/Extra_Trees.joblib




AdaBoost: 0.5152
Saved AdaBoost to saved_models/AdaBoost.joblib
Gradient Boosting: 0.5837
Saved Gradient Boosting to saved_models/Gradient_Boosting.joblib
Gradient Boosting: 0.5837
Saved Gradient Boosting to saved_models/Gradient_Boosting.joblib


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression: 0.5072
Saved Logistic Regression to saved_models/Logistic_Regression.joblib
K-Nearest Neighbors: 0.6037
Saved K-Nearest Neighbors to saved_models/K-Nearest_Neighbors.joblib
Neural Network: 0.5774
Saved Neural Network to saved_models/Neural_Network.joblib
Neural Network: 0.5774
Saved Neural Network to saved_models/Neural_Network.joblib


In [47]:
import os
import joblib

# Create a directory to save the models
models_dir = "saved_models"
if not os.path.exists(models_dir):
    os.makedirs(models_dir)

In [48]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler

In [49]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

model = Sequential([
    Dense(64, activation='relu', input_shape=(4,)),  
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(len(y.unique()), activation='softmax')  
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


history = model.fit(x_train_scaled, y_train, 
                    epochs=100, 
                    batch_size=32, 
                    validation_split=0.2,
                    verbose=0)

# Evaluate
test_loss, test_accuracy = model.evaluate(x_test_scaled, y_test, verbose=0)
print(f"Neural Network (TensorFlow): {test_accuracy:.4f}")

# Save the TensorFlow model
tf_model_filename = os.path.join(models_dir, "tensorflow_model.h5")
model.save(tf_model_filename)
print(f"Saved TensorFlow model to {tf_model_filename}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Neural Network (TensorFlow): 0.5799
Saved TensorFlow model to saved_models/tensorflow_model.h5


In [50]:
import os
import joblib

# Create a directory to save the models
models_dir = "saved_models"
if not os.path.exists(models_dir):
    os.makedirs(models_dir)

In [51]:
# Example of loading a saved model
import joblib

# Load the Random Forest model
loaded_rf_model = joblib.load(os.path.join(models_dir, "Random_Forest.joblib"))

# You can now use the loaded model to make predictions
# For example, on the test set
predictions = loaded_rf_model.predict(x_test)
print("Predictions from loaded Random Forest model:")
print(predictions)

# Example of loading the TensorFlow model
from tensorflow.keras.models import load_model

loaded_tf_model = load_model(os.path.join(models_dir, "tensorflow_model.h5"))

# You can now use the loaded model to make predictions
# Make sure to scale the input data just like you did for training
predictions_tf = loaded_tf_model.predict(x_test_scaled)
print("Predictions from loaded TensorFlow model:")
print(predictions_tf)



Predictions from loaded Random Forest model:
[5 2 5 ... 0 3 5]
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Predictions from loaded TensorFlow model:
[[1.32003110e-02 3.21003608e-02 2.71945208e-01 1.97523739e-02
  2.77294275e-05 6.62974000e-01]
 [1.21707484e-01 2.54908293e-01 3.78104687e-01 1.11087384e-02
  7.78951562e-06 2.34162986e-01]
 [4.24829312e-02 8.03820267e-02 3.28396618e-01 3.24286819e-02
  1.23167294e-04 5.16186595e-01]
 ...
 [5.81069946e-01 1.14821881e-01 2.26763651e-01 6.96958452e-02
  1.64595058e-08 7.64873531e-03]
 [2.93840450e-04 4.80718166e-03 5.28900981e-01 4.04038370e-01
  1.91154312e-02 4.28441875e-02]
 [1.57309957e-02 5.12093417e-02 4.66600329e-01 4.03574854e-02
  3.26775759e-03 4.22834128e-01]]
Predictions from loaded TensorFlow model:
[[1.32003110e-02 3.21003608e-02 2.71945208e-01 1.97523739e-02
  2.77294275e-05 6.62974000e-01]
 [1.21707484e-01 2.54908293e-01 