In [1]:
import os
import mne
import numpy as np
import joblib
from scipy import stats
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler

In [2]:
from scipy.stats import skew, kurtosis, entropy

def extract_features_multi(data, sfreq):                                    # Sinyallere ait ozellikleri burada cikartiyoruz. data: np.array, shape (n_epochs, n_channels, n_times), sfreq: ornekleme hizi (Hz)
                                                                            # Orijinal kodla ayni
    feature_list = []   
    count=0                                                                 # Islenen toplam epoch sayisi.

    for epoch in data:                                                      # Her epoch icin dongu donuyoruz ve featurelarini cikartiyoruz.
        feats = []
        for ch_signal in epoch:                                             # Her kanal icin dongu

            mean = np.mean(ch_signal)                                       # Ortalama
            median = np.median(ch_signal)                                   # Medyan
            variance = np.var(ch_signal)                                    # Varyans
            min_val = np.min(ch_signal)                                     # Minimum deger
            max_val = np.max(ch_signal)                                     # Maksimum deger
            ptp = max_val - min_val                                         # Tepe araligi
            
            feats.extend([mean, median, variance, min_val, max_val, ptp])
            
            autocorr = np.correlate(ch_signal, ch_signal, mode='full')[len(ch_signal)-1:len(ch_signal)+5]       # Ilk 5 gecikme icin otokorelasyon degerleri
            feats.extend(autocorr[1:6])
            
            t = np.arange(len(ch_signal))                                   # Zaman vektoru
            slope = np.polyfit(t, ch_signal, 1)[0]                          # Egim hesabi
            feats.append(slope)
            
            hist, _ = np.histogram(ch_signal, bins=10, density=True)        # Entropi hesabi
            ent = entropy(hist)
            feats.append(ent)
            
            rms = np.sqrt(np.mean(ch_signal**2))                            # Root Mean Square frekans hesabi
            feats.append(rms)
            
            zcr = np.sum(np.diff(np.sign(ch_signal)) != 0) / len(ch_signal)     # Zero Crossing Rate hesabi
            feats.append(zcr)
            
            s = skew(ch_signal)                                            # Skewness ve kurtosis hesabi
            k = kurtosis(ch_signal)
            feats.extend([s, k])
            
            count+=1
        feature_list.append(feats)

    print (f"Özellik çıkarımı için toplam {count} epoch işleniyor...")
    return np.array(feature_list)

In [3]:
def preprocess_data(psg_file, hyp_file, epoch_duration=30.0):
    raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
    
    wanted = ['EEG Fpz-Cz', 'EEG Pz-Oz', 'EOG horizontal', 'EMG submental']                             # Kullanacagimiz kanallari seciyoruz.
    use_chs = [ch for ch in raw.ch_names if ch in wanted]
    raw.pick_channels(use_chs)
    
    annotations = mne.read_annotations(hyp_file)                                                        # Hypnogram dosyasindan etiketleri cikartiyoruz.
    raw.set_annotations(annotations)
    
    stage_mapping = {                                                                                   # Olaylari mapliyoruz.
        'Sleep stage W': 0,
        'Sleep stage 1': 1,
        'Sleep stage 2': 2,
        'Sleep stage 3': 3,
        'Sleep stage 4': 3,
        'Sleep stage R': 4,
        'Sleep stage ?': -1,
        'Movement time': -1
    }
    
    
    events, event_dict = mne.events_from_annotations(                                                   # Epochlari ve etiketleri olusturuyoruz.
        raw, 
        event_id=stage_mapping,
        chunk_duration=epoch_duration
    )
    
    valid_events = [e for e in events if 0 <= e[2] <= 4]                                                # Gecerli uyku evrelerini filtreliyorum
    
    epochs = mne.Epochs(                                                                                # Epochlari olusturuyorum ve ozelliklerini cikartiyorum.
        raw, 
        valid_events, 
        tmin=0.0, 
        tmax=epoch_duration - 1/raw.info['sfreq'],
        baseline=None,
        preload=True
    )
    
    data = epochs.get_data()
    features = extract_features_multi(data, raw.info['sfreq'])
    labels = [e[2] for e in valid_events]
    
    return features, labels

In [4]:
def my_standard_scaler(X):              # Verilerimi 0-1 arasina standartize ediyorum.
    mean = np.mean(X, axis=0)           # Her sutunun ortalamasi
    std = np.std(X, axis=0)             # Her sutunun standart sapmasi
    X_scaled = (X - mean) / std         # Standardizasyon formulu
    return X_scaled

In [5]:
def predict_sleep_stages(psg_file, hyp_file):
    features, y_true = preprocess_data(psg_file, hyp_file)                                  # Verinin ozelliklerini ve etiketlerini hazirliyorum.
    
    features_scaled = my_standard_scaler(features)                                          # Ozellikleri olceklendiriyorum.
    
    rf_model = joblib.load('random_forest_model.joblib')                                    # Modelleri yukluyorum
    xgb_model = joblib.load('xgboost_model.joblib')
    lstm_model = load_model('lstm_model.h5')
    mlp_model = load_model('mlp_model.h5')
    
    rf_pred = rf_model.predict(features_scaled)                                             # Yuklenen modellerle tahmin yapiyorum.
    xgb_pred = xgb_model.predict(features_scaled)

    timesteps = 1
    features_per_timestep = features_scaled.shape[1] // timesteps
    X_reshaped = features_scaled.reshape(-1, timesteps, features_per_timestep)
    lstm_pred = np.argmax(lstm_model.predict(X_reshaped), axis=1)                           # LSTM icin veriyi yeniden sekillendiriyorum.
    
    mlp_pred = np.argmax(mlp_model.predict(features_scaled), axis=1)
    
    stage_map = {0: 'Wake', 1: 'N1', 2: 'N2', 3: 'N3', 4: 'REM'}                            # Performans ciktisi olusturuyorum.
    
    results = {
        'true': [stage_map[l] for l in y_true],
        'rf': [stage_map[p] for p in rf_pred],
        'xgb': [stage_map[p] for p in xgb_pred],
        'lstm': [stage_map[p] for p in lstm_pred],
        'mlp': [stage_map[p] for p in mlp_pred]
    }
    
    return results

In [6]:
from sklearn.metrics import accuracy_score, f1_score, cohen_kappa_score
def calculate_metrics(y_true, y_pred):                                                      # Performans verilerini elde ediyorum.
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='macro')
    kappa = cohen_kappa_score(y_true, y_pred)
    return acc, f1, kappa

In [7]:
import glob
from sklearn.metrics import classification_report, confusion_matrix
if __name__ == "__main__":

    data_dir = "./Sleep_EDF_Testing_Data"                                                   # Test dosyamizi aliyorum.
    psg_files = glob.glob(os.path.join(data_dir, "*0-PSG.edf"))
    
    all_true = []                                                                           # Tum modeller icin toplu sonuclari tutuyorum.
    all_rf_pred = []
    all_xgb_pred = []
    all_lstm_pred = []
    all_mlp_pred = []
    
    for psg_file in psg_files:                                                              # Dosya ciftlerini seciyorum.
        hyp_file = psg_file.replace('0-PSG.edf', 'C-Hypnogram.edf')
        
        if not os.path.exists(hyp_file):
            print(f"Hipnogram dosyası bulunamadı: {hyp_file}")
            continue
        
        print(f"İşleniyor: {os.path.basename(psg_file)}")
        results = predict_sleep_stages(psg_file, hyp_file)
        
        all_true.extend(results['true'])
        all_rf_pred.extend(results['rf'])
        all_xgb_pred.extend(results['xgb'])
        all_lstm_pred.extend(results['lstm'])
        all_mlp_pred.extend(results['mlp'])
                                                                     # Performans ciktilarini  hesapliyorum.
    print("Random Forest Performansı:")
    print(classification_report(all_true, all_rf_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(all_true, all_rf_pred, labels=['Wake', 'N1', 'N2', 'N3', 'REM']))
    
    print("XGBoost Performansı:")
    print(classification_report(all_true, all_xgb_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(all_true, all_xgb_pred, labels=['Wake', 'N1', 'N2', 'N3', 'REM']))
    
    print("LSTM Performansı:")
    print(classification_report(all_true, all_lstm_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(all_true, all_lstm_pred, labels=['Wake', 'N1', 'N2', 'N3', 'REM']))
    
    print("MLP Performansı:")
    print(classification_report(all_true, all_mlp_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(all_true, all_mlp_pred, labels=['Wake', 'N1', 'N2', 'N3', 'REM']))

    rf_metrics = calculate_metrics(all_true, all_rf_pred)           # Modellerin performansini karsilastiriyorum.
    xgb_metrics = calculate_metrics(all_true, all_xgb_pred)
    lstm_metrics = calculate_metrics(all_true, all_lstm_pred)
    mlp_metrics = calculate_metrics(all_true, all_mlp_pred)

    print("MODEL PERFORMANS KARŞILAŞTIRMASI:")
    print(f"RF:    Accuracy={rf_metrics[0]:.4f}, F1={rf_metrics[1]:.4f}, Kappa={rf_metrics[2]:.4f}")
    print(f"XGB:   Accuracy={xgb_metrics[0]:.4f}, F1={xgb_metrics[1]:.4f}, Kappa={xgb_metrics[2]:.4f}")
    print(f"LSTM:  Accuracy={lstm_metrics[0]:.4f}, F1={lstm_metrics[1]:.4f}, Kappa={lstm_metrics[2]:.4f}")
    print(f"MLP:   Accuracy={mlp_metrics[0]:.4f}, F1={mlp_metrics[1]:.4f}, Kappa={mlp_metrics[2]:.4f}")

İşleniyor: SC4102E0-PSG.edf


  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).


  raw.set_annotations(annotations)


Used Annotations descriptions: [np.str_('Movement time'), np.str_('Sleep stage 1'), np.str_('Sleep stage 2'), np.str_('Sleep stage 3'), np.str_('Sleep stage ?'), np.str_('Sleep stage R'), np.str_('Sleep stage W')]
Not setting metadata
2857 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 2857 events and 3000 original time points ...
0 bad epochs dropped
Özellik çıkarımı için toplam 11428 epoch işleniyor...




[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
İşleniyor: SC4111E0-PSG.edf


  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).


  raw.set_annotations(annotations)


Used Annotations descriptions: [np.str_('Movement time'), np.str_('Sleep stage 1'), np.str_('Sleep stage 2'), np.str_('Sleep stage 3'), np.str_('Sleep stage 4'), np.str_('Sleep stage ?'), np.str_('Sleep stage R'), np.str_('Sleep stage W')]
Not setting metadata
2641 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 2641 events and 3000 original time points ...
0 bad epochs dropped




Özellik çıkarımı için toplam 10564 epoch işleniyor...
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
İşleniyor: SC4112E0-PSG.edf


  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).


  raw.set_annotations(annotations)


Used Annotations descriptions: [np.str_('Sleep stage 1'), np.str_('Sleep stage 2'), np.str_('Sleep stage 3'), np.str_('Sleep stage 4'), np.str_('Sleep stage ?'), np.str_('Sleep stage R'), np.str_('Sleep stage W')]
Not setting metadata
2780 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 2780 events and 3000 original time points ...
0 bad epochs dropped
Özellik çıkarımı için toplam 11120 epoch işleniyor...




[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
İşleniyor: SC4121E0-PSG.edf


  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).


  raw.set_annotations(annotations)


Used Annotations descriptions: [np.str_('Sleep stage 1'), np.str_('Sleep stage 2'), np.str_('Sleep stage 3'), np.str_('Sleep stage 4'), np.str_('Sleep stage ?'), np.str_('Sleep stage R'), np.str_('Sleep stage W')]
Not setting metadata
2685 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 2685 events and 3000 original time points ...
0 bad epochs dropped




Özellik çıkarımı için toplam 10740 epoch işleniyor...




[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
İşleniyor: SC4131E0-PSG.edf


  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).


  raw.set_annotations(annotations)


Used Annotations descriptions: [np.str_('Sleep stage 1'), np.str_('Sleep stage 2'), np.str_('Sleep stage 3'), np.str_('Sleep stage 4'), np.str_('Sleep stage ?'), np.str_('Sleep stage R'), np.str_('Sleep stage W')]
Not setting metadata
2814 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 2814 events and 3000 original time points ...
0 bad epochs dropped




Özellik çıkarımı için toplam 11256 epoch işleniyor...




[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
Random Forest Performansı:
              precision    recall  f1-score   support

          N1       0.51      0.36      0.42       253
          N2       0.78      0.79      0.79      2465
          N3       0.72      0.63      0.67       519
         REM       0.53      0.70      0.60       938
        Wake       0.97      0.95      0.96      9602

    accuracy                           0.88     13777
   macro avg       0.70      0.68      0.69     13777
weighted avg       0.89      0.88      0.88     13777

Confusion Matrix:
[[9102   55  134   15  296]
 [  37   90   61    0   65]
 [ 179   19 1950  110  207]
 [  48    0  136  327    8]
 [  49   13  221    0  655]]
XGBoost Performansı:
              precision    recall  f1-score   support

          N1       0.30      0.46      0.36       253
          N2       0.78      0.75      0.77    