In [1]:
import os
import glob
import mne
import numpy as np
import pandas as pd
import joblib
from scipy import stats
from tensorflow.keras.models import load_model
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, cohen_kappa_score
import matplotlib.pyplot as plt

In [2]:
from scipy.signal import find_peaks, welch
from scipy.stats import entropy, kurtosis, skew, iqr

def bandpower(psd, freqs, fmin, fmax):
    idx_band = np.logical_and(freqs >= fmin, freqs <= fmax)
    return np.sum(psd[idx_band])

def extract_emotion_features(data, sfreq, n_fft_comp=5):
    feature_list = []
    count = 0
    
    # Zaman serisinde ozellik cikarimi yapiyorum. Buradaki ozniteliklerin cogunu TSFEL: Time Series Feature Extraction Library makalesinden aldim.

    for epoch in data:
        feats = []
        for ch_signal in epoch:
            t = np.arange(len(ch_signal))
            abs_energy = np.sum(ch_signal**2)                                                           # Sinyalin toplam enerjisi, guc olcusu
            area_curve = np.trapz(ch_signal)                                                            # Egri alti alan, sinyalin integraline benzer toplam degeri
            
            autocorr = np.corrcoef(ch_signal[:-1], ch_signal[1:])[0, 1] if np.std(ch_signal) > 0 else 0 # Sinyalin kendisiyle bir gecikmeyle olan korelasyonu
            
            avg_power = np.mean(ch_signal**2)                                                           # Ortalama guc
            centroid = np.sum(t * np.abs(ch_signal)) / (np.sum(np.abs(ch_signal)) + 1e-12)              # Sinyal agirlik merkezi
            
            hist, _ = np.histogram(ch_signal, bins=10, density=True)
            hist_entropy = entropy(hist + 1e-12)                                                        # Sinyal genlik dagiliminin duzensizligi
            
            freqs, psd = welch(ch_signal, sfreq, nperseg=min(256, len(ch_signal)))
            psd /= np.sum(psd) + 1e-12                                                                  # Frekanslara gore enerji dagilimi
            
            fund_freq = freqs[np.argmax(psd)] if len(psd) > 0 else 0                                    # En yuksek guce sahip frekans bileşeni
            
            hist_vals, hist_bins = np.histogram(ch_signal, bins=10)
            hist_mode = hist_bins[np.argmax(hist_vals)]                                                 # En cok gorulen genlik degeri
            
            human_energy = np.sum(psd[(freqs >= 0.5) & (freqs <= 40)])                                  # Insan EEG bandindaki enerji
            iqr_val = iqr(ch_signal)                                                                    # Sinyalin orta %50'sinin yayilimi
            kurt_val = kurtosis(ch_signal)                                                              # Carpiklik, uc deger yogunlugu
            
            max_val = np.max(ch_signal)                                                                 # Maksimum deger
            max_psd = np.max(psd)                                                                       # Maksimum guc
            max_freq = freqs[-1]                                                                        # En yuksek olculebilir frekans
            
            mean_val = np.mean(ch_signal)                                                               # Ortalama deger
            mad = np.mean(np.abs(ch_signal - mean_val))                                                 # Ortalama mutlak sapma
            mean_abs_diff = np.mean(np.abs(np.diff(ch_signal)))                                         # Komşu ornekler arasindaki ortalama mutlak fark
            mean_diff = np.mean(np.diff(ch_signal))                                                     # Komşu ornekler arasindaki ortalama fark
            
            med_val = np.median(ch_signal)                                                              # Medyan deger
            med_abs_dev = np.median(np.abs(ch_signal - med_val))                                        # Medyan mutlak sapma
            med_abs_diff = np.median(np.abs(np.diff(ch_signal)))                                        # Medyan komşu mutlak farki
            med_diff = np.median(np.diff(ch_signal))                                                    # Medyan komşu farki
            cumulative_power = np.cumsum(psd)
            med_freq = freqs[np.argmax(cumulative_power >= 0.5)] if len(psd) > 0 else 0                 # Medyan frekans
            
            min_val = np.min(ch_signal)                                                                 # Minimum deger
            
            diff_signal = np.diff(ch_signal)
            sign_changes = np.diff(np.sign(diff_signal))
            neg_turning = np.sum(sign_changes > 0)                                                      # Negatif egimden pozitif egime geciş sayisi
            pos_turning = np.sum(sign_changes < 0)                                                      # Pozitif egimden negatif egime geciş sayisi
            peaks, _ = find_peaks(ch_signal)
            n_peaks = len(peaks)                                                                        # Tepe sayisi
            peak_dist = np.mean(np.diff(peaks)) if n_peaks > 1 else 0                                   # Tepeler arasi ortalama mesafe
            
            if len(psd) > 0:
                peak_psd = np.max(psd)
                mask = psd >= (peak_psd / 2)
                try:
                    power_bandwidth = freqs[mask][-1] - freqs[mask][0]                                  # Gucun %50'sinden fazlasini iceren frekans araligi
                except:
                    power_bandwidth = 0
            else:
                power_bandwidth = 0
                
            rms = np.sqrt(avg_power)                                                                    
            signal_dist = np.sum(np.abs(ch_signal))                                                     # Mutlak genlik toplami
            skew_val = skew(ch_signal)                                                                  # Simetri olcusu
            slope = np.polyfit(t, ch_signal, 1)[0]                                                      # Lineer egim, trend
            
            spec_centroid = np.sum(freqs * psd) / (np.sum(psd) + 1e-12)                                 # Spektral agirlik merkezi
            spec_decrease = np.sum((psd[1:] - psd[0]) / np.arange(1, len(psd))) / (np.sum(psd[1:]) + 1e-12) if len(psd) > 1 else 0   # Yuksek frekanslardaki guc kaybi
            spec_ent = entropy(psd + 1e-12)                                                             # Spektral entropi
            spec_kurt = kurtosis(psd)                                                                   # Spektral carpiklik
            diff_psd = np.diff(psd)
            psd_sign_changes = np.diff(np.sign(diff_psd))
            spec_pos_turning = np.sum(psd_sign_changes < 0)                                             # Spektral donuş noktasi sayisi
            spec_roll_off = freqs[np.argmax(cumulative_power >= 0.85)] if len(psd) > 0 else 0           # Enerjinin %85'ine ulaşilan frekans
            spec_roll_on = freqs[np.argmax(cumulative_power >= 0.05)] if len(psd) > 0 else 0            # Enerjinin %5'ine ulaşilan frekans
            spec_skew = skew(psd)                                                                       # Spektral skewness
            spec_slope = np.polyfit(freqs, psd, 1)[0] if len(psd) > 1 else 0                            # Spektral egim
            spec_spread = np.sqrt(np.sum(psd * (freqs - spec_centroid)**2) / (np.sum(psd) + 1e-12))     # Spektral yayilma
            
            std = np.std(ch_signal)                                                                     # Standart sapma
            sum_abs_diff = np.sum(np.abs(np.diff(ch_signal)))                                           # Ornekler arasi toplam fark
            var = np.var(ch_signal)                                                                     # Varyans
            
            zero_crossings = np.sum(np.diff(np.sign(ch_signal)) != 0)
            zcr = zero_crossings / len(ch_signal)                                                       # Sinyalin kac kez sifiri gectigi

            bp_delta = bandpower(psd, freqs, 0.5, 4)                                                    # Bant güçleri (delta: 0.5–4Hz, theta: 4–8Hz, alpha: 8–13Hz, beta: 13–30Hz, gamma: 30–40Hz)
            bp_theta = bandpower(psd, freqs, 4, 8)
            bp_alpha = bandpower(psd, freqs, 8, 13)
            bp_beta  = bandpower(psd, freqs, 13, 30)
            bp_gamma = bandpower(psd, freqs, 30, 40)
            
            fft_vals = np.fft.rfft(ch_signal)                                                           # FFT bileşenleri (en büyük n_fft_comp genlik)
            fft_power = np.abs(fft_vals)
            top_fft_indices = np.argsort(fft_power)[-n_fft_comp:][::-1]
            top_fft_components = fft_power[top_fft_indices]

            features = [
                abs_energy, area_curve, autocorr, avg_power, centroid, hist_entropy,
                fund_freq, hist_mode, human_energy, iqr_val, kurt_val, max_val,
                bp_delta, bp_theta, bp_alpha, bp_beta, bp_gamma, *top_fft_components,
                max_psd, max_freq, mean_val, mad, mean_abs_diff, mean_diff, med_val,
                med_abs_dev, med_abs_diff, med_diff, med_freq, min_val, neg_turning,
                n_peaks, peak_dist, pos_turning, power_bandwidth, rms, signal_dist,
                skew_val, slope, spec_centroid, spec_decrease, spec_ent,
                spec_kurt, spec_pos_turning, spec_roll_off, spec_roll_on, spec_skew,
                spec_slope, spec_spread, std, sum_abs_diff, var, zcr
            ]
            
            feats.extend(features)
            
        feature_list.append(feats)
        count += 1
        
    print(f"Özellik çıkarımı için toplam {count} epoch işlendi.")
    return np.array(feature_list)

In [3]:
label_mapping = {
    'ibeg': 0, 'iend': 1, 'rsrt': 2, 'fixl': 3,
    'quiz': 4, 'qdon': 5, 'base': 6, 'bend': 7, 'trno': 8,
    'fixL': 9, 'stm': 10, 'clic': 11, 'vlnc': 12, 'arsl': 13,
    'dmns': 14, 'lkng': 15, 'fmrt': 16, 'relv': 17, 'cate': 18,
    'IBEG': 0, 'IEND': 1, 'puse': 19, 'boundary': 20, 'stop': 21,
    'baseline': 22,'baseend': 23,'trialno': 24,'fixation loop': 25,
    'neutral_1_1': 26,'valence': 27,'arousal': 13,'dominance': 14,'liking': 15,
    'familiarity': 16,'relevance': 17,'3_2': 28,'click': 29,
    'impedances begin': 30,'emotion_categ': 31,'impedances end': 32,'stop event': 33
}

def create_sequences(data, seq_length=10):                                                              # Zaman serisi verileri olusturuyorum.
    sequences = []
    for i in range(len(data) - seq_length + 1):
        sequences.append(data[i:i+seq_length])
    return np.array(sequences)


def preprocess_emotion_data(eeg_file, event_file, epoch_duration=2.0):
    try:
        raw = mne.io.read_raw_eeglab(eeg_file, preload=True, verbose=False)                             # EEG verisini ve olaylari yukluyoruz.
        events_df = pd.read_csv(event_file, sep='\t')
        
        events = []
        for _, row in events_df.iterrows():                                                             # Etiketleri sayisallastiriyorum
            sample = int(row['onset'] * raw.info['sfreq']/1000)
            label_str = str(row['trial_type']).lower().strip()
            label = label_mapping.get(label_str, -1)
            if label >= 0:
                events.append([sample, 0, label])
        
        events_array = np.array(events, dtype=np.int64)
        
        epochs = mne.Epochs(                                                                            # Epochlari olusturuyorum.
            raw,
            events_array,
            tmin=0.0,
            tmax=epoch_duration,
            baseline=None,
            preload=True,
            reject_by_annotation=False
        )
        
        data = epochs.get_data()                                                                        # Epoch verilerinin ozelliklerini cikartiyorum.
        features = extract_emotion_features(data, raw.info['sfreq'])    
        labels = epochs.events[:, -1]
        
        return features, labels
        
    except Exception as e:
        print(f"Error processing {eeg_file}: {str(e)}")
        return np.array([]), np.array([])

In [4]:
import joblib
def predict_emotions(eeg_file, event_file, seq_length=10):

    features, true_labels = preprocess_emotion_data(eeg_file, event_file)                                   # Ham ozellikleri ve gercek etiketleri cikartiyorm.
    if features.size == 0:
        return {}

    imputer = joblib.load('imputer.joblib')                                                                 # Kaydedilmis preprocessing objelerini yukluyorum
    selected_indices = joblib.load('selected_feature_indices.joblib')
    scaler = joblib.load('scaler.joblib')

    n_imp = imputer.n_features_in_                                                                          # Ozellik boyutunu imputerin bekledigi formata getiriyorum.
    if features.shape[1] != n_imp:
        print(f"Özellik sayısı uyumsuz: beklenen {n_imp}, gelen {features.shape[1]}. Kırpılıyor.")
        features = features[:, :n_imp]


    features_imp = imputer.transform(features)                                                              # Impute ediyorum ve sadece secilmis indeksleri aliyorum.
    features_sel = features_imp[:, selected_indices]

    n_scl = scaler.n_features_in_                                                                           # Scalerin bekledigi boyuta getiriyorum.
    if features_sel.shape[1] != n_scl:
        print(f"Scalerın bekldigi {n_scl} öznitelik, ama elimde {features_sel.shape[1]} var. Kırpılıyor.")
        features_sel = features_sel[:, :n_scl]

    features_scaled = scaler.transform(features_sel)                                                        # Olceklendirme yapiyorum.

    if len(features_scaled) < seq_length:                                                                   # Zaman serisi dizilerini olusturuyorum
        print(f"Yeterli epoch yok ({len(features_scaled)}) — seq_length={seq_length}")
        return {}
    sequences = create_sequences(features_scaled, seq_length)

    rnn_model = load_model('rnn_model.h5')                                                                  # Modelleri yukluyorum ve tahmin yapiyorum.
    lstm_model = load_model('lstm_model.h5')
    gru_model = load_model('gru_model.h5')
    transformer_model = load_model('transformer_model.keras')

    rnn_preds = np.argmax(rnn_model.predict(sequences, verbose=0), axis=1)
    lstm_preds = np.argmax(lstm_model.predict(sequences, verbose=0), axis=1)
    gru_preds = np.argmax(gru_model.predict(sequences, verbose=0), axis=1)
    transformer_preds = np.argmax(transformer_model.predict(sequences, verbose=0), axis=1)

    true_labels_seq = true_labels[seq_length - 1:]

    mapping_inverse = {v: k for k, v in label_mapping.items()}                                  
    results = {
        'true':        [mapping_inverse.get(x, 'UNKNOWN') for x in true_labels_seq],
        'rnn':        [mapping_inverse.get(x, 'UNKNOWN') for x in rnn_preds],
        'lstm':        [mapping_inverse.get(x, 'UNKNOWN') for x in lstm_preds],
        'gru':         [mapping_inverse.get(x, 'UNKNOWN') for x in gru_preds],
        'transformer': [mapping_inverse.get(x, 'UNKNOWN') for x in transformer_preds]
    }

    return results

In [5]:
def calculate_metrics(y_true, y_pred):                                          # Performans verilerini elde ediyorum.
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='macro')
    kappa = cohen_kappa_score(y_true, y_pred)
    return acc, f1, kappa

In [6]:
def test_emotion_models(data_dir):
    import joblib
    
    le = joblib.load('label_encoder.joblib')                                    # Kaydettigimiz labelleri yukluyorum
    
    eeg_files = glob.glob(os.path.join(data_dir, "*.set"))                      # Dosyadaki EEG dosya ikililerini aliyorum.
    all_true, all_rnn, all_lstm, all_gru, all_transformer = [], [], [], [], []              # Tum modeller icin toplu sonuclari tutuyorum.
    for eeg_file in eeg_files:
        event_file = eeg_file.replace('_eeg.set', '_events.tsv')
        if not os.path.exists(event_file):
            print(f"Event file not found: {event_file}")
            continue
            
        print(f"Processing: {os.path.basename(eeg_file)}")
        results = predict_emotions(eeg_file, event_file)
        
        if not results:
            continue
            
        all_true.extend(results['true'])
        all_rnn.extend(results['rnn'])
        all_lstm.extend(results['lstm'])
        all_gru.extend(results['gru'])
        all_transformer.extend(results['transformer'])

    if len(all_true) == 0:
        print("Data işlenemedi.")
        return
    
    classes = le.classes_

    print("LabelEncoder sınıfları:", classes)
    print("LabelEncoder sınıf sayısı:", len(classes))
    print("Model tahmin unique değerleri (RNN):", np.unique(all_rnn))
    print("Model tahmin unique değerleri (LSTM):", np.unique(all_lstm))
    print("Model tahmin unique değerleri (GRU):", np.unique(all_gru))
    print("Model tahmin unique değerleri (Transformer):", np.unique(all_transformer))
    print("True label unique değerleri:", np.unique(all_true))
    all_unique_labels = sorted(set(all_true + all_rnn + all_lstm + all_gru + all_transformer))                     # Tum essiz labellari aliyorum.
    

    print("RNN Performance:")                                                                                             # Modellerin performanslarini hesapliyoruz.
    print(classification_report(all_true, all_rnn, labels=all_unique_labels, target_names=all_unique_labels, zero_division=0))
    print("Confusion Matrix:")
    print(confusion_matrix(all_true, all_rnn, labels=all_unique_labels))
    
    print("LSTM Performance:")                                                                                            
    print(classification_report(all_true, all_lstm, labels=all_unique_labels, target_names=all_unique_labels, zero_division=0))
    print("Confusion Matrix:")
    print(confusion_matrix(all_true, all_lstm, labels=all_unique_labels))

    print("GRU Performance:")
    print(classification_report(all_true, all_gru, labels=all_unique_labels, target_names=all_unique_labels, zero_division=0))
    print("Confusion Matrix:")
    print(confusion_matrix(all_true, all_gru, labels=all_unique_labels))
    
    print("Transformer Performance:")
    print(classification_report(all_true, all_transformer, labels=all_unique_labels, target_names=all_unique_labels, zero_division=0))
    print("Confusion Matrix:")
    print(confusion_matrix(all_true, all_transformer, labels=all_unique_labels))
    

    lstm_metrics = calculate_metrics(all_true, all_lstm)                                                                               # Kullandigimiz metrikleri hesapliyoruz.
    rnn_metrics = calculate_metrics(all_true, all_rnn)
    gru_metrics = calculate_metrics(all_true, all_gru)
    transformer_metrics = calculate_metrics(all_true, all_gru)
    
    print("MODEL PERFORMANS KARŞILAŞTIRMASI:")
    print(f"RNN:   Accuracy={rnn_metrics[0]:.4f}, F1={rnn_metrics[1]:.4f}, Kappa={rnn_metrics[2]:.4f}")
    print(f"LSTM:   Accuracy={lstm_metrics[0]:.4f}, F1={lstm_metrics[1]:.4f}, Kappa={lstm_metrics[2]:.4f}")
    print(f"GRU:  Accuracy={gru_metrics[0]:.4f}, F1={gru_metrics[1]:.4f}, Kappa={gru_metrics[2]:.4f}")
    print(f"Transformer:  Accuracy={transformer_metrics[0]:.4f}, F1={transformer_metrics[1]:.4f}, Kappa={transformer_metrics[2]:.4f}")

label_mapping = {
    'ibeg': 0, 'iend': 1, 'rsrt': 2, 'fixl': 3,
    'quiz': 4, 'qdon': 5, 'base': 6, 'bend': 7, 'trno': 8,
    'fixL': 9, 'stm': 10, 'clic': 11, 'vlnc': 12, 'arsl': 13,
    'dmns': 14, 'lkng': 15, 'fmrt': 16, 'relv': 17, 'cate': 18,
    'IBEG': 0, 'IEND': 1, 'puse': 19, 'boundary': 20, 'stop': 21,
    'baseline': 22, 'baseend': 23, 'trialno': 24, 'fixation loop': 25,
    'neutral_1_1': 26, 'valence': 27, 'arousal': 13, 'dominance': 14, 'liking': 15,
    'familiarity': 16, 'relevance': 17, '3_2': 28, 'click': 29,
    'impedances begin': 30, 'emotion_categ': 31, 'impedances end': 32, 'stop event': 33
}

if __name__ == "__main__":
    test_data_dir = "./EmotionData/Emotion_EDF_Testing_Data"
    test_emotion_models(test_data_dir)

Processing: sub-mit081_task-Emotion_eeg.set


pns: ['ECG', 'EMG', 'EMG_2']
  raw = mne.io.read_raw_eeglab(eeg_file, preload=True, verbose=False)                             # EEG verisini ve olaylari yukluyoruz.


Not setting metadata
135 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 135 events and 501 original time points ...
0 bad epochs dropped


  raw = mne.io.read_raw_eeglab(eeg_file, preload=True, verbose=False)                             # EEG verisini ve olaylari yukluyoruz.
  area_curve = np.trapz(ch_signal)                                                            # Egri alti alan, sinyalin integraline benzer toplam degeri


Özellik çıkarımı için toplam 135 epoch işlendi.












Processing: sub-mit082_task-Emotion_eeg.set


pns: ['ECG', 'EMG', 'EMG_2']
  raw = mne.io.read_raw_eeglab(eeg_file, preload=True, verbose=False)                             # EEG verisini ve olaylari yukluyoruz.


Not setting metadata
165 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 165 events and 501 original time points ...
0 bad epochs dropped


  raw = mne.io.read_raw_eeglab(eeg_file, preload=True, verbose=False)                             # EEG verisini ve olaylari yukluyoruz.
  raw = mne.io.read_raw_eeglab(eeg_file, preload=True, verbose=False)                             # EEG verisini ve olaylari yukluyoruz.
  area_curve = np.trapz(ch_signal)                                                            # Egri alti alan, sinyalin integraline benzer toplam degeri


Özellik çıkarımı için toplam 165 epoch işlendi.




LabelEncoder sınıfları: [ 3  8 10 11 12 13 14 15 16 17]
LabelEncoder sınıf sayısı: 10
Model tahmin unique değerleri (RNN): ['dominance' 'familiarity' 'fixl' 'liking' 'trno' 'vlnc']
Model tahmin unique değerleri (LSTM): ['IBEG' 'IEND' 'bend' 'fixl']
Model tahmin unique değerleri (GRU): ['IBEG' 'base' 'baseline' 'dominance' 'fixL' 'quiz' 'vlnc']
Model tahmin unique değerleri (Transformer): ['IBEG' 'arousal' 'base' 'fixL' 'quiz' 'trno']
True label unique değerleri: ['IBEG' 'IEND' 'arousal' 'boundary' 'cate' 'clic' 'dominance'
 'familiarity' 'fixl' 'liking' 'puse' 'relevance' 'rsrt' 'stm' 'stop'
 'trno' 'vlnc']
RNN Performance:
              precision    recall  f1-score   support

        IBEG       0.00      0.00      0.00         4
        IEND       0.00      0.00      0.00         4
     arousal       0.00      0.00      0.00        21
        base       0.00      0.00      0.00         0
    baseline       0.00      0.00      0.00         0
        bend       0.00      0.00      0.00