In [1]:
import os
import glob
import mne
import numpy as np
import pandas as pd
import joblib
from scipy import stats
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, cohen_kappa_score
import matplotlib.pyplot as plt

In [2]:
from scipy.signal import find_peaks, welch
from scipy.stats import entropy, kurtosis, skew, iqr

def extract_emotion_features(data, sfreq):
    feature_list = []
    count = 0
    
    # Zaman serisinde ozellik cikarimi yapiyorum. Buradaki ozniteliklerin cogunu TSFEL: Time Series Feature Extraction Library makalesinden aldim.

    for epoch in data:
        feats = []
        for ch_signal in epoch:
            t = np.arange(len(ch_signal))
            abs_energy = np.sum(ch_signal**2)                                                           # Sinyalin toplam enerjisi, guc olcusu
            area_curve = np.trapz(ch_signal)                                                            # Egri alti alan, sinyalin integraline benzer toplam degeri
            
            autocorr = np.corrcoef(ch_signal[:-1], ch_signal[1:])[0, 1] if np.std(ch_signal) > 0 else 0 # Sinyalin kendisiyle bir gecikmeyle olan korelasyonu
            
            avg_power = np.mean(ch_signal**2)                                                           # Ortalama guc
            centroid = np.sum(t * np.abs(ch_signal)) / (np.sum(np.abs(ch_signal)) + 1e-12)              # Sinyal agirlik merkezi
            
            hist, _ = np.histogram(ch_signal, bins=10, density=True)
            hist_entropy = entropy(hist + 1e-12)                                                        # Sinyal genlik dagiliminin duzensizligi
            
            freqs, psd = welch(ch_signal, sfreq, nperseg=min(256, len(ch_signal)))
            psd /= np.sum(psd) + 1e-12                                                                  # Frekanslara gore enerji dagilimi
            
            fund_freq = freqs[np.argmax(psd)] if len(psd) > 0 else 0                                    # En yuksek guce sahip frekans bileşeni
            
            hist_vals, hist_bins = np.histogram(ch_signal, bins=10)
            hist_mode = hist_bins[np.argmax(hist_vals)]                                                 # En cok gorulen genlik degeri
            
            human_energy = np.sum(psd[(freqs >= 0.5) & (freqs <= 40)])                                  # Insan EEG bandindaki enerji
            iqr_val = iqr(ch_signal)                                                                    # Sinyalin orta %50'sinin yayilimi
            kurt_val = kurtosis(ch_signal)                                                              # Carpiklik, uc deger yogunlugu
            
            max_val = np.max(ch_signal)                                                                 # Maksimum deger
            max_psd = np.max(psd)                                                                       # Maksimum guc
            max_freq = freqs[-1]                                                                         # En yuksek olculebilir frekans
            
            mean_val = np.mean(ch_signal)                                                               # Ortalama deger
            mad = np.mean(np.abs(ch_signal - mean_val))                                                 # Ortalama mutlak sapma
            mean_abs_diff = np.mean(np.abs(np.diff(ch_signal)))                                         # Komşu ornekler arasindaki ortalama mutlak fark
            mean_diff = np.mean(np.diff(ch_signal))                                                     # Komşu ornekler arasindaki ortalama fark
            
            med_val = np.median(ch_signal)                                                              # Medyan deger
            med_abs_dev = np.median(np.abs(ch_signal - med_val))                                        # Medyan mutlak sapma
            med_abs_diff = np.median(np.abs(np.diff(ch_signal)))                                        # Medyan komşu mutlak farki
            med_diff = np.median(np.diff(ch_signal))                                                    # Medyan komşu farki
            cumulative_power = np.cumsum(psd)
            med_freq = freqs[np.argmax(cumulative_power >= 0.5)] if len(psd) > 0 else 0                 # Medyan frekans
            
            min_val = np.min(ch_signal)                                                                 # Minimum deger
            
            diff_signal = np.diff(ch_signal)
            sign_changes = np.diff(np.sign(diff_signal))
            neg_turning = np.sum(sign_changes > 0)                                                      # Negatif egimden pozitif egime geciş sayisi
            pos_turning = np.sum(sign_changes < 0)                                                      # Pozitif egimden negatif egime geciş sayisi
            peaks, _ = find_peaks(ch_signal)
            n_peaks = len(peaks)                                                                        # Tepe sayisi
            peak_dist = np.mean(np.diff(peaks)) if n_peaks > 1 else 0                                   # Tepeler arasi ortalama mesafe
            
            if len(psd) > 0:
                peak_psd = np.max(psd)
                mask = psd >= (peak_psd / 2)
                try:
                    power_bandwidth = freqs[mask][-1] - freqs[mask][0]                                  # Gucun %50'sinden fazlasini iceren frekans araligi
                except:
                    power_bandwidth = 0
            else:
                power_bandwidth = 0
                
            rms = np.sqrt(avg_power)                                                                    
            signal_dist = np.sum(np.abs(ch_signal))                                                     # Mutlak genlik toplami
            skew_val = skew(ch_signal)                                                                  # Simetri olcusu
            slope = np.polyfit(t, ch_signal, 1)[0]                                                      # Lineer egim, trend
            
            spec_centroid = np.sum(freqs * psd) / (np.sum(psd) + 1e-12)                                 # Spektral agirlik merkezi
            spec_decrease = np.sum((psd[1:] - psd[0]) / np.arange(1, len(psd))) / (np.sum(psd[1:]) + 1e-12) if len(psd) > 1 else 0   # Yuksek frekanslardaki guc kaybi
            spec_ent = entropy(psd + 1e-12)                                                             # Spektral entropi
            spec_kurt = kurtosis(psd)                                                                   # Spektral carpiklik
            diff_psd = np.diff(psd)
            psd_sign_changes = np.diff(np.sign(diff_psd))
            spec_pos_turning = np.sum(psd_sign_changes < 0)                                             # Spektral donuş noktasi sayisi
            spec_roll_off = freqs[np.argmax(cumulative_power >= 0.85)] if len(psd) > 0 else 0           # Enerjinin %85'ine ulaşilan frekans
            spec_roll_on = freqs[np.argmax(cumulative_power >= 0.05)] if len(psd) > 0 else 0            # Enerjinin %5'ine ulaşilan frekans
            spec_skew = skew(psd)                                                                       # Spektral skewness
            spec_slope = np.polyfit(freqs, psd, 1)[0] if len(psd) > 1 else 0                            # Spektral egim
            spec_spread = np.sqrt(np.sum(psd * (freqs - spec_centroid)**2) / (np.sum(psd) + 1e-12))     # Spektral yayilma
            
            std = np.std(ch_signal)                                                                     # Standart sapma
            sum_abs_diff = np.sum(np.abs(np.diff(ch_signal)))                                           # Ornekler arasi toplam fark
            var = np.var(ch_signal)                                                                     # Varyans
            
            zero_crossings = np.sum(np.diff(np.sign(ch_signal)) != 0)
            zcr = zero_crossings / len(ch_signal)                                                       # Sinyalin kac kez sifiri gectigi
            
            features = [
                abs_energy, area_curve, autocorr, avg_power, centroid, hist_entropy,
                fund_freq, hist_mode, human_energy, iqr_val, kurt_val, max_val,
                max_psd, max_freq, mean_val, mad, mean_abs_diff, mean_diff, med_val,
                med_abs_dev, med_abs_diff, med_diff, med_freq, min_val, neg_turning,
                n_peaks, peak_dist, pos_turning, power_bandwidth, rms, signal_dist,
                skew_val, slope, spec_centroid, spec_decrease, spec_ent,
                spec_kurt, spec_pos_turning, spec_roll_off, spec_roll_on, spec_skew,
                spec_slope, spec_spread, std, sum_abs_diff, var, zcr
            ]
            
            feats.extend(features)
            
        feature_list.append(feats)
        count += 1
        
    print(f"ozellik cikarimi icin toplam {count} epoch işlendi.")
    return np.array(feature_list)

In [3]:
label_mapping = {
    'ibeg': 0, 'iend': 1, 'rsrt': 2, 'fixl': 3,
    'quiz': 4, 'qdon': 5, 'base': 6, 'bend': 7, 'trno': 8,
    'fixL': 9, 'stm': 10, 'clic': 11, 'vlnc': 12, 'arsl': 13,
    'dmns': 14, 'lkng': 15, 'fmrt': 16, 'relv': 17, 'cate': 18,
    'IBEG': 0, 'IEND': 1, 'puse': 19, 'boundary': 20, 'stop': 21,
    'baseline': 22,'baseend': 23,'trialno': 24,'fixation loop': 25,
    'neutral_1_1': 26,'valence': 27,'arousal': 13,'dominance': 14,'liking': 15,
    'familiarity': 16,'relevance': 17,'3_2': 28,'click': 29,
    'impedances begin': 30,'emotion_categ': 31,'impedances end': 32,'stop event': 33
}
def preprocess_emotion_data(eeg_file, event_file, epoch_duration=2.0):
    try:
        raw = mne.io.read_raw_eeglab(eeg_file, preload=True, verbose=False)                             # EEG verisini ve olaylari yukluyoruz.
        events_df = pd.read_csv(event_file, sep='\t')
        
        events = []
        for _, row in events_df.iterrows():                                                             # Etiketleri sayisallastiriyorum
            sample = int(row['onset'] * raw.info['sfreq']/1000)
            label_str = str(row['trial_type']).lower().strip()
            label = label_mapping.get(label_str, -1)
            if label >= 0:
                events.append([sample, 0, label])
        
        events_array = np.array(events, dtype=np.int64)
        
        epochs = mne.Epochs(                                                                            # Epochlari olusturuyorum.
            raw,
            events_array,
            tmin=0.0,
            tmax=epoch_duration,
            baseline=None,
            preload=True,
            reject_by_annotation=False
        )
        
        data = epochs.get_data()                                                                        # Epoch verilerinin ozelliklerini cikartiyorum.
        features = extract_emotion_features(data, raw.info['sfreq'])    
        labels = epochs.events[:, -1]
        
        return features, labels
        
    except Exception as e:
        print(f"Error processing {eeg_file}: {str(e)}")
        return np.array([]), np.array([])

In [4]:
def my_standard_scaler(X):              # Verilerimi 0-1 arasina standartize ediyorum.
    mean = np.mean(X, axis=0)           # Her sutunun ortalamasi
    std = np.std(X, axis=0)             # Her sutunun standart sapmasi
    X_scaled = (X - mean) / std         # Standardizasyon formulu
    return X_scaled

In [5]:
import joblib
def predict_emotions(eeg_file, event_file):
    features, true_labels = preprocess_emotion_data(eeg_file, event_file)                               # Veriyi onisleme uyguluyorum.
    if len(features) == 0:
        return {}
    imputer = joblib.load('imputer.joblib')
    selector = joblib.load('selector.joblib')

    features_imputed = imputer.transform(features)
    features_selected = selector.transform(features_imputed)
    features_scaled = my_standard_scaler(features_selected)                                                      # Veriyi standartize ediyorum.
    
    rf_model = joblib.load('random_forest_model.joblib')                                                # Modelleri yukluyorum.
    xgb_model = joblib.load('xgboost_model.joblib')
    mlp_model = load_model('mlp_model.h5')
    
    rf_pred = rf_model.predict(features_scaled)                                                         # Tahmin yaptiriyorum.
    xgb_pred = xgb_model.predict(features_scaled)
    mlp_pred = np.argmax(mlp_model.predict(features_scaled), axis=1)

    mapping_inverse = {v: k for k, v in label_mapping.items()}                                          # Simdi tersten map ediyoruz, hata aliyordum burayi da ChatGPT'den aldim.
    true_labels_str = [mapping_inverse.get(x, 'UNKNOWN') for x in true_labels]
    rf_pred_str = [mapping_inverse.get(x, 'UNKNOWN') for x in rf_pred]
    xgb_pred_str = [mapping_inverse.get(x, 'UNKNOWN') for x in xgb_pred]
    mlp_pred_str = [mapping_inverse.get(x, 'UNKNOWN') for x in mlp_pred]
                                                                      
    results = {                                                                                         # Labellari decode ediyoruz.
        'true': true_labels_str,
        'rf': rf_pred_str,
        'xgb': xgb_pred_str,
        'mlp': mlp_pred_str
    }
    
    return results

In [6]:
def calculate_metrics(y_true, y_pred):                                          # Performans verilerini elde ediyorum.
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='macro')
    kappa = cohen_kappa_score(y_true, y_pred)
    return acc, f1, kappa

In [7]:
def test_emotion_models(data_dir):
    import joblib
    
    le = joblib.load('label_encoder.joblib')                                    # Kaydettigimiz labelleri yukluyorum
    
    eeg_files = glob.glob(os.path.join(data_dir, "*.set"))                      # Dosyadaki EEG dosya ikililerini aliyorum.
    all_true, all_rf, all_xgb, all_mlp = [], [], [], []                         # Tum modeller icin toplu sonuclari tutuyorum.
    for eeg_file in eeg_files:
        event_file = eeg_file.replace('_eeg.set', '_events.tsv')
        if not os.path.exists(event_file):
            print(f"Event file not found: {event_file}")
            continue
            
        print(f"Processing: {os.path.basename(eeg_file)}")
        results = predict_emotions(eeg_file, event_file)
        
        if not results:
            continue
            
        all_true.extend(results['true'])
        all_rf.extend(results['rf'])
        all_xgb.extend(results['xgb'])
        all_mlp.extend(results['mlp'])

    if len(all_true) == 0:
        print("Data işlenemedi.")
        return
    
    classes = le.classes_

    print("LabelEncoder sınıfları:", classes)
    print("LabelEncoder sınıf sayısı:", len(classes))
    print("Model tahmin unique değerleri (rf):", np.unique(all_rf))
    print("Model tahmin unique değerleri (xgb):", np.unique(all_xgb))
    print("True label unique değerleri:", np.unique(all_true))
    all_unique_labels = sorted(list(set(all_true) | set(all_rf) | set(all_xgb) | set(all_mlp)))                     # Tum essiz labellari aliyorum.
    

    print("Random Forest Performance:")                                                                                             # Modellerin performanslarini hesapliyoruz.
    print(classification_report(all_true, all_rf, labels=all_unique_labels, target_names=all_unique_labels, zero_division=0))
    print("Confusion Matrix:")
    print(confusion_matrix(all_true, all_rf, labels=all_unique_labels))
    
    print("XGBoost Performance:")
    print(classification_report(all_true, all_xgb, labels=all_unique_labels, target_names=all_unique_labels, zero_division=0))
    print("Confusion Matrix:")
    print(confusion_matrix(all_true, all_xgb, labels=all_unique_labels))
    
    print("MLP Performance:")
    print(classification_report(all_true, all_mlp, labels=all_unique_labels, target_names=all_unique_labels, zero_division=0))
    print("Confusion Matrix:")
    print(confusion_matrix(all_true, all_mlp, labels=all_unique_labels))
    

    rf_metrics = calculate_metrics(all_true, all_rf)                                                                               # Kullandigimiz modellerin metriklerini hesapliyoruz.
    xgb_metrics = calculate_metrics(all_true, all_xgb)
    mlp_metrics = calculate_metrics(all_true, all_mlp)
    
    print("MODEL PERFORMANS KARŞILAŞTIRMASI:")
    print(f"RF:   Accuracy={rf_metrics[0]:.4f}, F1={rf_metrics[1]:.4f}, Kappa={rf_metrics[2]:.4f}")
    print(f"XGB:  Accuracy={xgb_metrics[0]:.4f}, F1={xgb_metrics[1]:.4f}, Kappa={xgb_metrics[2]:.4f}")
    print(f"MLP:  Accuracy={mlp_metrics[0]:.4f}, F1={mlp_metrics[1]:.4f}, Kappa={mlp_metrics[2]:.4f}")

label_mapping = {
    'ibeg': 0, 'iend': 1, 'rsrt': 2, 'fixl': 3,
    'quiz': 4, 'qdon': 5, 'base': 6, 'bend': 7, 'trno': 8,
    'fixL': 9, 'stm': 10, 'clic': 11, 'vlnc': 12, 'arsl': 13,
    'dmns': 14, 'lkng': 15, 'fmrt': 16, 'relv': 17, 'cate': 18,
    'IBEG': 0, 'IEND': 1, 'puse': 19, 'boundary': 20, 'stop': 21,
    'baseline': 22, 'baseend': 23, 'trialno': 24, 'fixation loop': 25,
    'neutral_1_1': 26, 'valence': 27, 'arousal': 13, 'dominance': 14, 'liking': 15,
    'familiarity': 16, 'relevance': 17, '3_2': 28, 'click': 29,
    'impedances begin': 30, 'emotion_categ': 31, 'impedances end': 32, 'stop event': 33
}

if __name__ == "__main__":
    test_data_dir = "./EmotionData/Emotion_EDF_Testing_Data"
    test_emotion_models(test_data_dir)

Processing: sub-mit081_task-Emotion_eeg.set


pns: ['ECG', 'EMG', 'EMG_2']
  raw = mne.io.read_raw_eeglab(eeg_file, preload=True, verbose=False)                             # EEG verisini ve olaylari yukluyoruz.


Not setting metadata
135 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 135 events and 501 original time points ...
0 bad epochs dropped


  raw = mne.io.read_raw_eeglab(eeg_file, preload=True, verbose=False)                             # EEG verisini ve olaylari yukluyoruz.
  area_curve = np.trapz(ch_signal)                                                            # Egri alti alan, sinyalin integraline benzer toplam degeri


ozellik cikarimi icin toplam 135 epoch işlendi.


  671  718  765  812  859  906  953 1000 1047 1094 1141 1188 1235 1282
 1329 1376 1423 1470 1517 1564 1611 1658 1705 1752 1799 1846 1893 1940
 1987 2034 2081 2128 2175 2222 2269 2316 2363 2410 2457 2504 2551 2598
 2645 2692 2739 2786 2833 2880 2927 2974 3021 3068 3115 3162 3209 3256
 3303 3350 3397 3444 3491 3538 3585 3632 3679 3726 3773 3820 3867 3914
 3961 4008 4055 4102 4149 4196 4243 4290 4337 4384 4431 4478 4525 4572
 4619 4666 4713 4760 4807 4854 4901 4948 4995 5042 5089 5136 5183 5230
 5277 5324 5371 5418 5465 5512 5559 5606 5653 5700 5747 5794 5841 5888
 5935 5982 6016 6017 6018 6019 6020 6022 6023 6024 6025 6026 6027 6028
 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042
 6043 6044 6045 6046 6047 6048 6049 6050 6052 6053 6054 6055 6056 6057
 6058 6059 6060 6061 6062 6069 6076 6085 6101 6116 6123 6132 6148 6163
 6170 6179]. At least one non-missing value is needed for imputation with strategy='mean'.
  X_scaled = (X - mean) / std         # Standardizasyon f

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Processing: sub-mit082_task-Emotion_eeg.set


pns: ['ECG', 'EMG', 'EMG_2']
  raw = mne.io.read_raw_eeglab(eeg_file, preload=True, verbose=False)                             # EEG verisini ve olaylari yukluyoruz.


Not setting metadata
165 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 165 events and 501 original time points ...
0 bad epochs dropped


  raw = mne.io.read_raw_eeglab(eeg_file, preload=True, verbose=False)                             # EEG verisini ve olaylari yukluyoruz.
  raw = mne.io.read_raw_eeglab(eeg_file, preload=True, verbose=False)                             # EEG verisini ve olaylari yukluyoruz.
  area_curve = np.trapz(ch_signal)                                                            # Egri alti alan, sinyalin integraline benzer toplam degeri


ozellik cikarimi icin toplam 165 epoch işlendi.


  671  718  765  812  859  906  953 1000 1047 1094 1141 1188 1235 1282
 1329 1376 1423 1470 1517 1564 1611 1658 1705 1752 1799 1846 1893 1940
 1987 2034 2081 2128 2175 2222 2269 2316 2363 2410 2457 2504 2551 2598
 2645 2692 2739 2786 2833 2880 2927 2974 3021 3068 3115 3162 3209 3256
 3303 3350 3397 3444 3491 3538 3585 3632 3679 3726 3773 3820 3867 3914
 3961 4008 4055 4102 4149 4196 4243 4290 4337 4384 4431 4478 4525 4572
 4619 4666 4713 4760 4807 4854 4901 4948 4995 5042 5089 5136 5183 5230
 5277 5324 5371 5418 5465 5512 5559 5606 5653 5700 5747 5794 5841 5888
 5935 5982 6016 6017 6018 6019 6020 6022 6023 6024 6025 6026 6027 6028
 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042
 6043 6044 6045 6046 6047 6048 6049 6050 6052 6053 6054 6055 6056 6057
 6058 6059 6060 6061 6062 6069 6076 6085 6101 6116 6123 6132 6148 6163
 6170 6179]. At least one non-missing value is needed for imputation with strategy='mean'.
  X_scaled = (X - mean) / std         # Standardizasyon f

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
LabelEncoder sınıfları: [ 0  1  3  8 10 11 12 13 14 15 16 17 18]
LabelEncoder sınıf sayısı: 13
Model tahmin unique değerleri (rf): ['IEND' 'base' 'bend' 'clic' 'fixL' 'fixl' 'qdon' 'quiz' 'rsrt' 'stm'
 'trno' 'vlnc']
Model tahmin unique değerleri (xgb): ['IBEG' 'IEND' 'base' 'bend' 'clic' 'fixL' 'fixl' 'qdon' 'quiz' 'rsrt'
 'stm' 'trno' 'vlnc']
True label unique değerleri: ['IBEG' 'IEND' 'arousal' 'base' 'bend' 'boundary' 'cate' 'clic'
 'dominance' 'familiarity' 'fixl' 'liking' 'puse' 'qdon' 'quiz'
 'relevance' 'rsrt' 'stm' 'stop' 'trno' 'vlnc']
Random Forest Performance:
              precision    recall  f1-score   support

        IBEG       0.00      0.00      0.00         4
        IEND       0.00      0.00      0.00         4
     arousal       0.00      0.00      0.00        22
        base       0.04      0.50      0.07         2
        bend       0.00      0.00      0.00         2
    boundary       0.00 