In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [6]:
normal_eeg_file_path = "normal_eeg.csv"
normal_ecg_file_path = "normal_ecg.csv"
normal_eog_file_path = "normal_eog.csv"
normal_emg_file_path = "normal_emg.csv"

insomniac_eeg_file_path = "insomnia_eeg.csv"
insomniac_ecg_file_path = "insomnia_ecg.csv"
insomniac_eog_file_path = "insomnia_eog.csv"
insomniac_emg_file_path = "insomnia_emg.csv"

# Load data from each file and label each as normal or insomniac
def load_and_label_data(eeg_file, ecg_file, eog_file, emg_file, condition_label):
    eeg_df = pd.read_csv(eeg_file)
    ecg_df = pd.read_csv(ecg_file)
    eog_df = pd.read_csv(eog_file)
    emg_df = pd.read_csv(emg_file)
    
    # Combine all features into a single DataFrame
    combined_df = pd.concat([eeg_df, ecg_df, eog_df, emg_df], axis=1)
    combined_df['Condition'] = condition_label  # Add condition label
    return combined_df

In [7]:
# Load and label normal and insomniac data
normal_data = load_and_label_data(normal_eeg_file_path, normal_ecg_file_path, normal_eog_file_path, normal_emg_file_path, 0)  # Label 0 for Normal
insomniac_data = load_and_label_data(insomniac_eeg_file_path, insomniac_ecg_file_path, insomniac_eog_file_path, insomniac_emg_file_path, 1)  # Label 1 for Insomniac


In [9]:
data = pd.concat([normal_data, insomniac_data], axis=0).reset_index(drop=True)


In [11]:
X = data.drop(columns=['Condition'])
y = data['Condition']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [15]:
model_list = {
    'SVM': SVC(probability=True),
    'Random Forest': RandomForestClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42),
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'AdaBoost': AdaBoostClassifier(random_state=42)
}

In [16]:
for model_name, model in model_list.items():
    print(f"\nTraining {model_name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{model_name} Accuracy: {accuracy:.2f}")
    print(f"\n{model_name} Classification Report:\n", classification_report(y_test, y_pred))



Training SVM...
SVM Accuracy: 0.62

SVM Classification Report:
               precision    recall  f1-score   support

           0       0.59      0.83      0.69      1293
           1       0.70      0.41      0.51      1245

    accuracy                           0.62      2538
   macro avg       0.65      0.62      0.60      2538
weighted avg       0.64      0.62      0.60      2538


Training Random Forest...
Random Forest Accuracy: 0.95

Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.93      0.95      1293
           1       0.93      0.96      0.95      1245

    accuracy                           0.95      2538
   macro avg       0.95      0.95      0.95      2538
weighted avg       0.95      0.95      0.95      2538


Training Gradient Boosting...
Gradient Boosting Accuracy: 0.87

Gradient Boosting Classification Report:
               precision    recall  f1-score   support

           0       0.89 



AdaBoost Accuracy: 0.78

AdaBoost Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.78      0.78      1293
           1       0.77      0.79      0.78      1245

    accuracy                           0.78      2538
   macro avg       0.78      0.78      0.78      2538
weighted avg       0.78      0.78      0.78      2538



In [17]:
data.to_csv("MAIN_DATA.csv", index=False)
