In [2]:
# 1. Import necessary libraries
import os
import numpy as np
import pandas as pd
from PyEMD import EMD
from scipy.stats import skew
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import joblib

In [3]:
# 2. Define constants
sampling_rate = 256  # Hz
window_size = 768    # One segment
max_imf = 5
low_dir = r"D:\PPG Dataset\Low_MWL\Low_MWL"
high_dir = r"D:\PPG Dataset\High_MWL\High_MWL"

In [4]:
# 3. Define function to extract features from IMF1
def extract_features_from_segment(segment):
    emd = EMD(spline_kind='cubic', MAX_ITERATION=100)
    imfs = emd(segment, max_imf=max_imf)
    if imfs.shape[0] > 0:
        imf1 = imfs[0]
        return [np.mean(imf1), np.min(imf1), np.max(imf1), skew(imf1)]
    else:
        return [0, 0, 0, 0]  # fallback


In [5]:
# 4. Loop through all files and extract features
def process_directory(directory_path, label):
    features = []
    for file in os.listdir(directory_path):
        if file.endswith(".csv"):
            path = os.path.join(directory_path, file)
            df = pd.read_csv(path)
            for col in df.columns:
                signal = df[col].dropna().values
                num_segments = len(signal) // window_size
                for i in range(num_segments):
                    segment = signal[i*window_size:(i+1)*window_size]
                    if len(segment) == window_size:
                        feats = extract_features_from_segment(segment)
                        feats.append(label)
                        features.append(feats)
    return features


In [6]:
# 5. Process both drowsy and burst sets
low_features = process_directory(low_dir, label=0)   # drowsy
high_features = process_directory(high_dir, label=1) # burst


In [7]:
# 6. Combine and create DataFrame
all_data = pd.DataFrame(low_features + high_features,
                        columns=["Imf1_Mean", "Imf1_Min", "Imf1_Max", "Imf1_Skew", "Label"])


In [8]:
# 7. Save features (optional)
all_data.to_csv("imf_drowsy_burst_data.csv", index=False)


In [9]:
# 8. Split and train model
X = all_data.drop("Label", axis=1)
y = all_data["Label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)


RandomForestClassifier(random_state=42)

In [17]:
mlp = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=300, random_state=42)
mlp.fit(X_train, y_train)
mlp_preds = mlp.predict(X_test)
print("\n🧠 MLP Classifier")
print(classification_report(y_test, mlp_preds, target_names=["Drowsy", "Burst"]))
print("Accuracy:", accuracy_score(y_test, mlp_preds))


🧠 MLP Classifier
              precision    recall  f1-score   support

      Drowsy       0.59      0.57      0.58       871
       Burst       0.59      0.61      0.60       889

    accuracy                           0.59      1760
   macro avg       0.59      0.59      0.59      1760
weighted avg       0.59      0.59      0.59      1760

Accuracy: 0.5903409090909091


In [19]:
# Save the trained MLP model
joblib.dump(mlp, 'drowsy_burst_mlp_model.joblib')
print("✅ MLP model saved successfully as 'drowsy_burst_mlp_model.joblib'")


✅ MLP model saved successfully as 'drowsy_burst_mlp_model.joblib'
