## Multiscale Entropy Analysis
***

### 1. Import libraries

In [None]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd
from scipy.stats import zscore
from tqdm.notebook import tqdm
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.colors import sample_colorscale
import matplotlib.pyplot as plt

try:
    import mne
except ImportError:
    print("mne library not found. Please install it for EEG data processing.")

try:
    import EntropyHub
    Mobj = EntropyHub.MSobject('SampEn')
except ImportError:
    print("EntropyHub library not found. Please install it for entropy analysis.")

try:
    import utilities
except ImportError:
    print("Utilities module not found. Check if the file is in the project directory.")

# Matplotlib settings for GUI compatibility
plt.rcParams.update({'font.size': 12, 'interactive': True})


### 2. Set folder path to data, this will produce a list of subjects based on the files in this folder

In [None]:

def get_subject_list(folder_path):
    file_list = os.listdir(folder_path)
    # Remove file extension and get unique subject names
    subject_list = [os.path.splitext(file)[0] for file in file_list if file.endswith('.csv')]
    return np.array(subject_list)

# Replace with your folder path or use a GUI element to get the path
folder_path = '/Users/tannercreel/Desktop/Dissertation/Cannabis-MSE/ProcessedData copy/Low_Frequency Users'  # This will be set through the GUI
subject_list = get_subject_list(folder_path)
print(subject_list)


### 3. Load data for one subject of choice. Specify the index of the subject from `subject_list`

In [None]:

def load_subject_data(folder_path, subject_name, sfreq):
    file_name = f"{subject_name}_FilteredData.csv" if '_FilteredData' not in subject_name else f"{subject_name}.csv"
    dpath = os.path.join(folder_path, file_name)

    if not os.path.exists(dpath):
        raise FileNotFoundError(f"No such file: {dpath}")

    data = pd.read_csv(dpath)
    if data.shape[1] <= 1:
        raise ValueError("Data format error: Expected more than one column.")

    data = data.iloc[:, 1:]  # Remove the first column (assumed timestamps)
    ch_names = data.columns
    time = np.linspace(0, len(data) / sfreq, len(data))
    return data, ch_names, time

# Usage (Ensure that 'subject_list' and 'folder_path' are already defined)
subject_idx = 0  # or any other mechanism to select a subject
sfreq = 1000
subject_name = subject_list[subject_idx]
data, ch_names, time = load_subject_data(folder_path, subject_name, sfreq)



### 4. Trim data to only include the first 30sec of the data

In [None]:
def trim_data(data, trim_length_sec, sfreq):
    """
    Trims the data to the specified length in seconds.

    :param data: DataFrame containing the EEG data.
    :param trim_length_sec: Length in seconds to which the data is to be trimmed.
    :param sfreq: Sampling frequency of the data.
    :return: Trimmed DataFrame.
    """
    trimmed_data_length = np.arange(0, trim_length_sec * sfreq)
    return data.iloc[trimmed_data_length, :]

# Example usage
trimmed_data = trim_data(data, 30, sfreq)  # Trim to first 30 seconds


### 5. Compute sample entropy on a given channel of the data at multiple scales
- #### Specify which channel of the data to use
- #### Specify the scales that the entropy should be computed

In [None]:
ch = ch_names[0]
scales = 20
scales_list = np.arange(1, scales+1)
entropy_df = pd.DataFrame(columns=["channel", "entropy", "complexity_index"]) #Initialize empty data frame
for ch in ch_names:
    Msx, CI = EntropyHub.MSEn(data[ch].values, Mobj, Scales=scales)
    entropy_df = entropy_df.append(pd.Series({"channel":ch,
                                              "entropy":Msx,
                                              "complexity_index":CI}),ignore_index=True) # recursive updating- defining a modified variable upon itself


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=scales_list, y=Msx, mode='lines+markers', line=dict(color='black'),
                         marker=dict(color='black', size=8)))
fig.update_layout(template='simple_white', width=600, height=400, font=dict(size=20),
                  xaxis_title='Scales', yaxis_title='Sample Entropy')
fig.show()

### 6. Compute MSE for each channel and plot all the channels and the average MSE across channels
- #### Just like in section 5, specify the number of scales to include

In [None]:
scales = 20
scales_list = np.arange(1, scales+1)

mse_across_channels_df = pd.DataFrame(columns=ch_names)
ci_df = pd.DataFrame(columns=['Channel','CI'])

for ch in tqdm(ch_names):
    ch_data = data[ch].values

    Msx, CI = EntropyHub.MSEn(ch_data, Mobj, Scales=scales)

    mse_series = pd.Series({ch:Msx})
    mse_across_channels_df[ch] = Msx
    ci_df = ci_df.append(pd.Series({'Channel':ch, 'CI':CI}), ignore_index=True)

In [None]:
fig = make_subplots(cols=2, column_widths=[0.2,0.4], horizontal_spacing=0.08)
colorscale = 'cividis'
colors = sample_colorscale(colorscale, np.linspace(0,1,len(ch_names)), low=0.0, high=0.9, colortype='rgb')

for i, ch in enumerate(ch_names):
    mse_vals = mse_across_channels_df[ch].values
    fig.add_trace(go.Scattergl(x=scales_list, y=mse_vals, mode='lines+markers', line=dict(color=colors[i]),
                               marker=dict(color=colors[i], size=8), name=ch), row=1, col=1)

mean_mse = mse_across_channels_df.mean(axis=1).values
sem_mse = mse_across_channels_df.sem(axis=1).values
fig.add_trace(go.Scattergl(x=scales_list, y=mean_mse, error_y=dict(type='data', array=sem_mse, visible=True),
                           mode='lines+markers', line=dict(color='brown', width=6),
                           marker=dict(color='brown', size=12), name='Average'), row=1, col=1)

fig.add_trace(go.Bar(x=ci_df['Channel'], y=ci_df['CI'], marker=dict(color=colors, line=dict(color='black', width=1)), showlegend=False), row=1, col=2)

fig.update_layout(template='simple_white', width=1800, height=600, font=dict(size=20),
                  xaxis_title='Scales', yaxis_title='Sample Entropy', title_text=subject_list[subject_idx])
fig.update_yaxes(title_text='Complexity Index', row=1, col=2)
fig.show()
# fig.write_html('./{}_EntropyAcrossChannels.html'.format(subject_list[subject_idx]))

***
### 7. Compute MSE across all subjects in a given group
- #### a. Load all data for all subjects in `folder_path` and put in a dictionary object
- #### b. Compute MSE for each channel, get the average MSE for each subject, and save the MSE per scale in a df
- #### c. Plot the MSE values across each subject and the average across subjects

#### a. Load all data for all subjects in `folder_path` and put in a dictionary object

In [None]:
all_data_dict = {} # initialize an empty dictionary to which each subject's data will be added
trimmed_data_length = np.arange(0, 30*sfreq) # define the length of data to trim down to

for subject_name in tqdm(subject_list):
    dpath = './data/Non_Users/ProcessedData/Filtered_Data/{}_FilteredData.csv'.format(subject_name)
    data = pd.read_csv(dpath)
    data = data.iloc[:,1:] # remove the first column because that column is the timestamps column

    data = data.iloc[trimmed_data_length,:] # trim the data to just the first 30sec

    all_data_dict[subject_name] = data # add the current subject's data to the dict

#### b. Compute MSE for each channel, get the average MSE for each subject, and save the MSE per scale in a df

In [None]:
scales = 20
scales_list = np.arange(1, scales+1)
chs_to_exclude = ['VEOG', 'HEOG', 'LeftMast', 'RightMast'] # define any channels to exclude

# initialize empty dataframes to which the data will be added in the for loop
mse_across_subjects_df = pd.DataFrame(columns=['Subject','Scale','Entropy'])
ci_across_subjects_df  = pd.DataFrame(columns=['Subject','CI'])

for subject_name in tqdm(subject_list):
    data = all_data_dict[subject_name]
    data = data[[ch not in chs_to_exclude for ch in data.columns]] # exclude any channels that you don't want included

    
    mean_eeg = data.mean(axis=1).values # compute mean eeg trace
    Msx, CI = EntropyHub.MSEn(mean_eeg, Mobj, Scales=scales) # compute entropy across scales and complexity index

    # add the current subject's data to the overall dataframes
    subject_mse_df = pd.DataFrame({'Subject':subject_name, 'Scale':scales_list, 'Entropy':Msx})
    mse_across_subjects_df = mse_across_subjects_df.append(subject_mse_df)
    ci_across_subjects_df = ci_across_subjects_df.append(pd.Series({'Subject':subject_name, 'CI':CI}), ignore_index=True)


#### c. Plot the MSE values across each subject and the average across subjects

In [None]:
fig = go.Figure()

# compute mean and standard error across subjects for MSE
avg_mse_across_subjects = mse_across_subjects_df.groupby(['Scale']).mean().reset_index()['Entropy'].values
sem_mse_across_subjects = mse_across_subjects_df.groupby(['Scale']).sem().reset_index()['Entropy'].values

# plot each subject's data
for subject_name in mse_across_subjects_df['Subject'].unique():
    subject_data = mse_across_subjects_df[mse_across_subjects_df['Subject'] == subject_name]

    fig.add_trace(go.Scattergl(x=subject_data['Scale'].values, y=subject_data['Entropy'].values, mode='lines+markers', line=dict(color='slategrey'),
                               marker=dict(color='slategrey', size=8), name=subject_name))

# add line for average across subjects
fig.add_trace(go.Scattergl(x=scales_list, y=avg_mse_across_subjects, error_y=dict(type='data', array=sem_mse_across_subjects, visible=True),
                           mode='lines+markers', line=dict(color='black', width=6),
                           marker=dict(color='black', size=12), name='Average'))

fig.update_layout(template='simple_white', width=1000, height=600, font=dict(size=20),
                  xaxis_title='Scales', yaxis_title='Sample Entropy')
fig.show()
