In [None]:
# shannon entropy, n-gram, mean duration, frequency, percentage time extraction from microstate sequences of subject 1
import mne
import neurokit2 as nk
import pandas as pd

# reading the dataset
eeg_file_raw = r"C:\Users\90537\Desktop\sub-001_task-eyesclosed_eeg.set"
raw_eeg = mne.io.read_raw_eeglab(eeg_file_raw, preload=True)

shannon_entropies = []
ngrams_2 = []
ngrams_3 = []
mean_microstates_duration_lst = []
mean_number_of_microstates_lst = []
percentage_times_lst = []

# defining the segment length and the number of microstates
segment_len = 5
number_of_microstates = 4

for time in range(int(raw_eeg.n_times / raw_eeg.info["sfreq"] / segment_len)):
    start_time = time * segment_len
    end_time = (time + 1) * segment_len

    # segmentation (5 seconds) using 4 microstates
    microstates = nk.microstates_segment(raw_eeg.copy().crop(tmin=start_time, tmax=end_time), n_microstates=number_of_microstates, random_state=42)

		# shannon entropy
    shannon_entropy = nk.microstates_complexity(microstates, show=False)

		# n-grams
    sequence = microstates["Sequence"]

    all_pairs = [f"{i}{j}" for i in range(number_of_microstates) for j in range(number_of_microstates)]
    pair_counts = {pair: 0 for pair in all_pairs}
    for i in range(len(sequence) - 1):
        pair = f"{int(sequence[i])}{int(sequence[i + 1])}"
        pair_counts[pair] += 1
    pair_counts = {pair: count / (len(sequence) - 1) for pair, count in pair_counts.items()}
    n_gram_2 = pd.DataFrame({str(key): [value] for key, value in sorted(pair_counts.items())})
    n_gram_2 = pd.DataFrame(n_gram_2)
    n_gram_2 = n_gram_2.drop(columns = [f"{i}{i}" for i in range(number_of_microstates)], axis=1)

    all_triples = [f"{i}{j}{k}" for i in range(number_of_microstates) for j in range(number_of_microstates) for k in range(number_of_microstates)]
    triple_counts = {triple: 0 for triple in all_triples}
    for j in range(len(sequence) - 2):
        triple = f"{int(sequence[j])}{int(sequence[j+1])}{int(sequence[j+2])}"
        triple_counts[triple] += 1
    triple_counts = {triple: count / (len(sequence) - 1) for triple, count in triple_counts.items()}
    n_gram_3 = pd.DataFrame({str(key): [value] for key, value in sorted(triple_counts.items())})
    n_gram_3 = pd.DataFrame(n_gram_3)
    n_gram_3 = n_gram_3.drop(columns = [f"{i}{i}{i}" for i in range(number_of_microstates)], axis=1)

    # mean microstates duration
    mean_microstates_duration = {}
    microstates_statistics = nk.microstates_static(microstates, sampling_rate=int(raw_eeg.info['sfreq']), show=False)
    for i in range(number_of_microstates):
        mean_microstates_duration_key = f'Mean_Microstate_{i}_Duration'
        mean_microstates_duration_value = microstates_statistics[f"Microstate_{i}_DurationMean"] * 1000
        mean_microstates_duration[mean_microstates_duration_key] = mean_microstates_duration_value
    mean_microstates_durations = pd.DataFrame({key:value for key, value in sorted(mean_microstates_duration.items())})

    # mean number of microstates
    mean_number_of_microstate = {}
    microstates_statistics = nk.microstates_static(microstates, sampling_rate=int(raw_eeg.info['sfreq']), show=False)
    for i in range(number_of_microstates):
        mean_number_of_microstates_key = f"Microstate_{i}_Frequency"
        mean_number_of_microstates_value = microstates_statistics[f"Microstate_{i}_Proportion"]
        mean_number_of_microstate[mean_number_of_microstates_key] = mean_number_of_microstates_value
    mean_number_of_microstates = pd.DataFrame({key:value for key, value in sorted(mean_number_of_microstate.items())})

    # percentage time
    import numpy as np
    percentage_time = {}
    for i in range(number_of_microstates):
        percentage_time_key = f"Microstate_{i}_Percentage_Time"
        total_time_value = ((microstates_statistics[f"Microstate_{i}_DurationMean"] * 1000) * np.ceil(microstates_statistics[f"Microstate_{i}_Proportion"] * len(sequence)))
        percentage_time_value = total_time_value / sum((microstates_statistics[f"Microstate_{j}_DurationMean"] * 1000) * np.ceil(microstates_statistics[f"Microstate_{j}_Proportion"] * len(sequence)) for j in range(number_of_microstates))
        percentage_time[percentage_time_key] = percentage_time_value
    percentage_times = pd.DataFrame({key:value for key, value in sorted(percentage_time.items())})

    shannon_entropies.append(shannon_entropy)
    ngrams_2.append(n_gram_2)
    ngrams_3.append(n_gram_3)
    mean_microstates_duration_lst.append(mean_microstates_durations)
    mean_number_of_microstates_lst.append(mean_number_of_microstates)
    percentage_times_lst.append(percentage_times)

# feature matrix construction
shannon_entropy_df = pd.concat(shannon_entropies, ignore_index=True)
ngrams_2_df = pd.concat(ngrams_2, ignore_index=True)
ngrams_3_df = pd.concat(ngrams_3, ignore_index=True)
ngrams_df = pd.concat([ngrams_2_df, ngrams_3_df], axis=1)
mean_microtates_durations_df = pd.concat(mean_microstates_duration_lst, ignore_index=True)
mean_number_of_microstates_df = pd.concat(mean_number_of_microstates_lst, ignore_index=True)
percentage_times_df = pd.concat(percentage_times_lst, ignore_index=True)

subject1_features_df = pd.merge(shannon_entropy_df, ngrams_df, left_index=True, right_index=True, how="inner")
subject1_features_df = pd.concat([subject1_features_df, mean_microtates_durations_df], axis=1)
subject1_features_df = pd.concat([subject1_features_df, mean_number_of_microstates_df], axis=1)
subject1_features_df = pd.concat([subject1_features_df, percentage_times_df], axis=1)
subject1_features_df

Unnamed: 0,Microstates_Entropy_Shannon,01,02,03,10,12,13,20,21,23,...,Mean_Microstate_2_Duration,Mean_Microstate_3_Duration,Microstate_0_Frequency,Microstate_1_Frequency,Microstate_2_Frequency,Microstate_3_Frequency,Microstate_0_Percentage_Time,Microstate_1_Percentage_Time,Microstate_2_Percentage_Time,Microstate_3_Percentage_Time
0,1.967081,0.0064,0.0120,0.0040,0.0068,0.0108,0.0080,0.0104,0.0112,0.0140,...,14.561798,25.507692,0.185126,0.224310,0.259096,0.331467,0.159259,0.204585,0.196286,0.439870
1,1.732418,0.0204,0.0072,0.0088,0.0172,0.0064,0.0080,0.0076,0.0048,0.0028,...,7.897436,17.440000,0.445422,0.318673,0.061575,0.174330,0.522744,0.308212,0.023310,0.145735
2,1.876482,0.0180,0.0056,0.0124,0.0168,0.0080,0.0048,0.0048,0.0072,0.0048,...,12.619048,20.836364,0.284286,0.380648,0.105958,0.229108,0.218568,0.481194,0.065693,0.234544
3,1.728390,0.0164,0.0036,0.0032,0.0132,0.0192,0.0024,0.0072,0.0172,0.0032,...,27.130435,12.545455,0.173131,0.397441,0.374250,0.055178,0.115354,0.400661,0.453095,0.030890
4,1.941501,0.0064,0.0032,0.0140,0.0052,0.0088,0.0068,0.0024,0.0080,0.0104,...,19.886792,23.696203,0.231907,0.183127,0.210716,0.374250,0.218744,0.154760,0.201038,0.425458
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,1.917783,0.0048,0.0020,0.0036,0.0040,0.0152,0.0076,0.0032,0.0132,0.0096,...,26.769231,22.230769,0.128749,0.292283,0.347861,0.231108,0.133256,0.262947,0.389112,0.214685
115,1.935958,0.0012,0.0124,0.0164,0.0016,0.0032,0.0140,0.0104,0.0056,0.0016,...,16.000000,19.950617,0.309476,0.223511,0.143942,0.323071,0.312308,0.259949,0.112604,0.315138
116,1.892679,0.0180,0.0072,0.0020,0.0172,0.0012,0.0152,0.0080,0.0020,0.0056,...,16.100000,17.068966,0.380248,0.293083,0.128749,0.197921,0.501642,0.241252,0.097767,0.159340
117,1.890390,0.0052,0.0048,0.0140,0.0040,0.0044,0.0040,0.0064,0.0048,0.0088,...,22.320000,20.328358,0.385046,0.119552,0.223111,0.272291,0.486710,0.092327,0.199364,0.221599
