In [16]:
# let's try using linear regression to predict the label using the irregularity score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import time

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
# Create a random forest classifier
from sklearn.ensemble import RandomForestClassifier

# Create a gradient boosting classifier
from sklearn.ensemble import GradientBoostingClassifier

# Create a support vector classifier
from sklearn.svm import SVC

In [19]:
#load the results
results = pd.read_csv('data/hrv_results.csv')
results = results.dropna().drop('record_id', axis=1)
print('results shape:', results.shape)

X, y = results.drop('label', axis=1), results['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

results shape: (182, 15)
X_train shape: (145, 14)
X_test shape: (37, 14)
y_train shape: (145,)
y_test shape: (37,)


In [20]:

# Let's use the following models to predict the label
# 1. Linear Regression
# 2. Random Forest
# 3. Gradient Boosting
# 4. Support Vector Machine

result = pd.DataFrame(columns=['Model', 'Train Score', 'Test Score', 'Runtime', 'Top Predictor'])

start_time = time.time()
lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)
lr_train_score = accuracy_score(y_train, lr.predict(X_train))
lr_test_score = accuracy_score(y_test, lr.predict(X_test))
lr_runtime = time.time() - start_time
top_predictor = X.columns[np.argmax(np.abs(lr.coef_[0]))]
result = pd.DataFrame({
    'Model': ['Logistic Regression'],
    'Train Score': [lr_train_score],
    'Test Score': [lr_test_score],
    'Runtime': [lr_runtime],
    'Top Predictor': [top_predictor]
})
print(result)
result = pd.concat([result], ignore_index=True)


start_time = time.time()
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
rf_train_score = accuracy_score(y_train, rf.predict(X_train))
rf_test_score = accuracy_score(y_test, rf.predict(X_test))
rf_runtime = time.time() - start_time
top_predictor = X.columns[np.argmax(rf.feature_importances_)]
result_rf = pd.DataFrame({
    'Model': ['Random Forest'],
    'Train Score': [rf_train_score],
    'Test Score': [rf_test_score],
    'Runtime': [rf_runtime],
    'Top Predictor': [top_predictor]
})
print(result_rf)
result = pd.concat([result, result_rf], ignore_index=True)


start_time = time.time()
gb = GradientBoostingClassifier(random_state=42)
gb.fit(X_train, y_train)
gb_train_score = accuracy_score(y_train, gb.predict(X_train))
gb_test_score = accuracy_score(y_test, gb.predict(X_test))
gb_runtime = time.time() - start_time
top_predictor = X.columns[np.argmax(gb.feature_importances_)]
result_gb = pd.DataFrame({
    'Model': ['Gradient Boosting'],
    'Train Score': [gb_train_score],
    'Test Score': [gb_test_score],
    'Runtime': [gb_runtime],
    'Top Predictor': [top_predictor]
})
print(result_gb)
result = pd.concat([result, result_gb], ignore_index=True)


start_time = time.time()
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train, y_train)
svm_train_score = accuracy_score(y_train, svm.predict(X_train))
svm_test_score = accuracy_score(y_test, svm.predict(X_test))
svm_runtime = time.time() - start_time
top_predictor = X.columns[np.argmax(np.abs(svm.coef_[0]))]
result_svm = pd.DataFrame({
    'Model': ['Support Vector Machine'],
    'Train Score': [svm_train_score],
    'Test Score': [svm_test_score],
    'Runtime': [svm_runtime],
    'Top Predictor': [top_predictor]
})
print(result_svm)
result = pd.concat([result, result_svm], ignore_index=True)

# Display the results
display(result)

# Optional: Display detailed classification report for best model
best_model_idx = result['Test Score'].idxmax()
best_model_name = result.loc[best_model_idx, 'Model']
print(f"\nDetailed Classification Report for {best_model_name}:")
if best_model_name == 'Logistic Regression':
    y_pred = lr.predict(X_test)
elif best_model_name == 'Random Forest':
    y_pred = rf.predict(X_test)
elif best_model_name == 'Gradient Boosting':
    y_pred = gb.predict(X_test)
else:
    y_pred = svm.predict(X_test)
print(classification_report(y_test, y_pred))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


                 Model  Train Score  Test Score   Runtime Top Predictor
0  Logistic Regression     0.675862    0.567568  0.014606         pnn50
           Model  Train Score  Test Score   Runtime Top Predictor
0  Random Forest          1.0    0.675676  0.115502          nn50
               Model  Train Score  Test Score   Runtime Top Predictor
0  Gradient Boosting          1.0    0.756757  0.257548         pnn50
                    Model  Train Score  Test Score    Runtime Top Predictor
0  Support Vector Machine     0.731034    0.594595  21.463787   lf_hf_ratio


Unnamed: 0,Model,Train Score,Test Score,Runtime,Top Predictor
0,Logistic Regression,0.675862,0.567568,0.014606,pnn50
1,Random Forest,1.0,0.675676,0.115502,nn50
2,Gradient Boosting,1.0,0.756757,0.257548,pnn50
3,Support Vector Machine,0.731034,0.594595,21.463787,lf_hf_ratio



Detailed Classification Report for Gradient Boosting:
              precision    recall  f1-score   support

           A       0.00      0.00      0.00         0
           N       0.86      0.79      0.83        24
           O       0.75      0.75      0.75        12
           ~       0.00      0.00      0.00         1

    accuracy                           0.76        37
   macro avg       0.40      0.39      0.39        37
weighted avg       0.80      0.76      0.78        37



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
