In [8]:
# shannon entropy and n-gram extraction from microstate sequences of subject 1
import mne
import neurokit2 as nk
import pandas as pd

eeg_file_raw = r"C:\Users\90537\Desktop\sub-001_task-eyesclosed_eeg.set"
raw_eeg = mne.io.read_raw_eeglab(eeg_file_raw, preload=True)

shannon_entropies = []
ngrams = []
segment_len = 5

for time in range(int(raw_eeg.n_times / raw_eeg.info["sfreq"] / segment_len)):
    start_time = time * segment_len
    end_time = (time + 1) * segment_len

    best_number_of_microstates = 4
    best_gev = 0
    best_microstates = None

    for number_of_microstates in range(4, 8):
        microstates = nk.microstates_segment(raw_eeg.copy().crop(tmin=start_time, tmax=end_time), n_microstates=number_of_microstates, method="kmod", random_state=42)
        gev = microstates['GEV']  

        if gev > best_gev:
            best_gev = gev
            best_number_of_microstates = number_of_microstates
            best_microstates = microstates

    shannon_entropy = nk.microstates_complexity(best_microstates)
    
    sequence = best_microstates["Sequence"]
    all_pairs = [f"{i}{j}" for i in range(7) for j in range(7)]
    pair_counts = {pair: 0 for pair in all_pairs}
    for i in range(len(sequence) - 1):
        pair = f"{int(sequence[i])}{int(sequence[i + 1])}"
        pair_counts[pair] += 1
    n_gram = {str(key): [value] for key, value in sorted(pair_counts.items())}
    n_gram = pd.DataFrame(n_gram)

    shannon_entropies.append(shannon_entropy)
    ngrams.append(n_gram)

shannon_entropy_df = pd.concat(shannon_entropies, ignore_index=True)
ngrams_df = pd.concat(ngrams, ignore_index=True)
subject1_features_df = pd.merge(shannon_entropy_df, ngrams_df, left_index=True, right_index=True, how="inner")
subject1_features_df = subject1_features_df.drop(columns=["00", "11", "22", "33", "44", "55", "66"], axis=1)
columns = ["01", "02", "03", "04", "05", "06", "10", "12", "13", "52", "53", "54", "56", "60", "61", "62", "63", "64", "65"]
mapping = {"0": "A", "1": "B", "2": "C", "3": "D", "4": "E", "5": "F", "6": "G"}
subject1_features_df.columns = ["".join(mapping[digit] for digit in col) if col != 'Microstates_Entropy_Shannon' else col for col in subject1_features_df.columns]
subject1_features_df.rename(columns={"Microstates_Entropy_Shannon": "Shannon Entropy"}, inplace=True)
subject1_features_df

Unnamed: 0,Shannon Entropy,AB,AC,AD,AE,AF,AG,BA,BC,BD,...,FC,FD,FE,FG,GA,GB,GC,GD,GE,GF
0,2.408724,0,20,10,4,33,4,0,13,4,...,25,8,0,0,4,5,0,1,2,2
1,2.660946,5,5,12,4,5,14,4,2,32,...,8,11,4,1,15,50,0,4,13,0
2,2.576888,32,12,9,10,2,2,24,9,20,...,0,1,22,3,4,12,2,1,5,2
3,2.627937,12,1,1,14,16,1,11,5,0,...,6,10,0,0,1,0,1,12,29,4
4,2.720854,1,36,0,6,9,22,1,15,10,...,17,8,0,1,18,5,3,5,9,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,2.504092,15,6,27,0,3,0,17,3,11,...,1,16,6,0,0,0,0,0,0,0
115,2.614569,5,0,0,4,0,11,3,12,1,...,5,23,12,0,6,1,6,21,0,1
116,2.708244,27,7,23,1,0,13,30,12,11,...,11,0,10,1,9,2,2,15,15,2
117,2.712256,1,1,0,9,9,10,0,8,14,...,6,24,0,25,11,0,21,1,0,19
