In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedKFold
from mlxtend.feature_selection import SequentialFeatureSelector
from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs
from sklearn.metrics import roc_auc_score
# Import grid search
from sklearn.model_selection import GridSearchCV

# Load dataset into a pandas DataFrame
data = pd.read_csv('C:/Users/mathi/OneDrive/Skrivebord/2_Sem_Kand/Projekt_Kandidat2/Projekt_Kandidat2/DataPreb/DataPSGandDetektion.csv')

X = data[['AVGHR', 'AVGRR', 'AVGACT', 'SLEEPSCORE', 'DURATIONINBED', 'DURATIONAWAKE', 'DURATIONINSLEEP', 'DURATIONINREM', 'DURATIONINLIGHT', 'DURATIONINDEEP', 'DURATIONSLEEPONSET', 'BEDEXITDURATION', 'BEDEXITCOUNT', 'TOSSNTURNCOUNT', 'FROMGMTOFFSET', 'MINHR', 'MAXHR', 'MINRR', 'MAXRR', 'FMCOUNT', 'AWAKENINGS', 'HRVSCORE', 'HRVLF', 'HRVHF', 'HRVRMSSDEVENING', 'HRVRMSSDMORNING', 'BPSYS', 'BPDIA', 'HR', 'WEIGHT', 'STEPS', 'NYHABL', 'AGEBL', 'WEIGHTBL', 'HEIGHTBL', 'BPSYSBL', 'BPDIABL', 'HRB', 'EFTITBL', 'GENDER']]
y = data['SLEEPAPNEA']

patient_ids = data['patient_ID']

# Find unikke patient-ID'er
unique_patients = np.unique(patient_ids)

# Opdel patient-ID'er i træning og test
train_patients, test_patients = train_test_split(unique_patients, test_size=0.2, random_state=42)

# Opdel data efter patient-ID
X_train = X[patient_ids.isin(train_patients)]
y_train = y[patient_ids.isin(train_patients)]
X_test = X[patient_ids.isin(test_patients)]
y_test = y[patient_ids.isin(test_patients)]

param_grid = {'n_estimators': [10, 50, 100, 200, 500], 'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9], 'min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9]}
gridModel = GridSearchCV(RandomForestClassifier(), param_grid, cv=5)

# Create a SequentialFeatureSelector object
sfs = SequentialFeatureSelector(estimator=gridModel,
                                k_features=15,
                                forward=True,
                                scoring='roc_auc',
                                cv=5)

# Perform feature selection on the training data
X_train_selected = sfs.fit_transform(X_train, y_train)

selected_features = list(sfs.k_feature_names_)

# Compute ROC-AUC score for test data
y_test_pred = sfs.estimator_.predict_proba(X_test)[:, 1]
roc_auc = roc_auc_score(y_test, y_test_pred)


fig1 = plot_sfs(sfs.get_metric_dict(), kind='std_dev')
ax = plt.gca()
plt.title('Sequential Forward Selection (w. StdDev)')
ax.set_xticklabels(list(selected_features), rotation=90, fontsize=8)
plt.ylabel('ROC-AUC')
plt.xlabel('Features')
# Tilføj følgende linje for at vise ROC-AUC for testdataen
plt.axhline(y=roc_auc, color='r', linestyle='--', label='Test ROC-AUC')
plt.legend(loc='lower right')
plt.show()
