## Multiscale Entropy Analysis
***

### 1. Import libraries

In [None]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd
from scipy.stats import zscore
from tqdm.notebook import tqdm
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.colors import sample_colorscale
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt

try:
    import mne
except ImportError:
    print("mne library not found. Please install it for EEG data processing.")

try:
    import EntropyHub
    Mobj = EntropyHub.MSobject('FuzzEn')
except ImportError:
    print("EntropyHub library not found. Please install it for entropy analysis.")

try:
    import utilities
except ImportError:
    print("Utilities module not found. Check if the file is in the project directory.")

# Matplotlib settings for GUI compatibility
plt.rcParams.update({'font.size': 12, 'interactive': True})


### 2. Set folder path to data, this will produce a list of subjects based on the files in this folder

In [None]:

def get_subject_list(folder_path):
    file_list = os.listdir(folder_path)
    # Remove file extension and get unique subject names
    subject_list = [os.path.splitext(file)[0] for file in file_list if file.endswith('.csv')]
    return np.array(subject_list)

# Replace with your folder path or use a GUI element to get the path
folder_path = '/Users/tannercreel/Desktop/Test'  # This will be set through the GUI
subject_list = get_subject_list(folder_path)
print(subject_list)


### 3. Load data for one subject of choice. Specify the index of the subject from `subject_list`

In [None]:

subject_idx = 1
dpath = '/Users/tannercreel/Desktop/Test/{}.csv'.format(subject_list[subject_idx]) # Replace with your folder path up to "/{.csv}"
data = pd.read_csv(dpath)
data = data.iloc[:,1:] # remove the first column because that column is the timestamps column
ch_names = data.columns
sfreq = 256
time = np.linspace(0, data.shape[0]/sfreq, data.shape[0])



### 4. Trim data to only include the first 30sec of the data

In [None]:
trimmed_data_length = np.arange(0, 30*sfreq) # Trim to desired length in seconds
data = data.iloc[trimmed_data_length,:]


### 5. Compute Sample Entropy on a given channel of the data at multiple scales and Complexity Index (Single-subject single-channel analysis)

In [None]:
# Define the name of the channel for analysis
target_channel = 'Alpha_AF7'
scales = 20
scales_list = np.arange(1, scales + 1)

# Initialize empty dataframe to store results
entropy_df = pd.DataFrame(columns=["channel", "entropy", "complexity_index"])

# Compute MSE and CI for the specified channel
Msx, CI = EntropyHub.MSEn(data[target_channel].values, Mobj, Scales=scales)

# Append results to dataframe using concat
new_row = pd.DataFrame({"channel": [target_channel], "entropy": [Msx], "complexity_index": [CI]})
entropy_df = pd.concat([entropy_df, new_row], ignore_index=True)

# Create subplots with two columns
fig = make_subplots(rows=1, cols=2, subplot_titles=("MSE", "Complexity Index"))

# Add line graph for MSE
fig.add_trace(go.Scatter(x=scales_list, y=Msx, mode='lines+markers', line=dict(color='black'),
                         marker=dict(color='black', size=8), name='MSE'), row=1, col=1)

# Add bar graph for Complexity Index
fig.add_trace(go.Bar(x=[target_channel], y=[CI], marker=dict(color='blue'), name='Complexity Index'), row=1, col=2)

# Update layout
fig.update_layout(template='simple_white', width=1200, height=600, font=dict(size=20),
                  xaxis_title='Scales', title_text=f"Analysis for {target_channel}")

# Set x-axis tickvals and ticktext for the bar graph
fig.update_xaxes(tickvals=[target_channel], ticktext=[target_channel], row=1, col=2)

# Show plot
fig.show()


### 6. Compute Multiscale Entropy (MSE) for each channel, plot all the channels, and the average MSE across channels (Single-subject multichannel analysis)

In [None]:
scales = 20
scales_list = np.arange(1, scales + 1)

mse_across_channels_df = pd.DataFrame(columns=ch_names)
ci_df = pd.DataFrame(columns=['Channel', 'CI'])

for ch in tqdm(ch_names):
    ch_data = data[ch].values

    Msx, CI = EntropyHub.MSEn(ch_data, Mobj, Scales=scales)

    mse_series = pd.Series(Msx, name=ch)
    mse_across_channels_df[ch] = mse_series
    ci_df = pd.concat([ci_df, pd.DataFrame({'Channel': [ch], 'CI': [CI]})], ignore_index=True)


In [None]:
# Visualization code
fig = make_subplots(cols=2, column_widths=[0.2, 0.4], horizontal_spacing=0.08)
colorscale = 'cividis'
colors = sample_colorscale(colorscale, np.linspace(0, 1, len(ch_names)), low=0.0, high=0.9, colortype='rgb')

for i, ch in enumerate(ch_names):
    mse_vals = mse_across_channels_df[ch].values
    fig.add_trace(go.Scattergl(x=scales_list, y=mse_vals, mode='lines+markers', line=dict(color=colors[i]),
                               marker=dict(color=colors[i], size=8), name=ch), row=1, col=1)

mean_mse = mse_across_channels_df.mean(axis=1).values
sem_mse = mse_across_channels_df.sem(axis=1).values
fig.add_trace(go.Scattergl(x=scales_list, y=mean_mse, error_y=dict(type='data', array=sem_mse, visible=True),
                           mode='lines+markers', line=dict(color='brown', width=6),
                           marker=dict(color='brown', size=12), name='Average'), row=1, col=1)

fig.add_trace(go.Bar(x=ci_df['Channel'], y=ci_df['CI'], marker=dict(color=colors, line=dict(color='black', width=1)), showlegend=False), row=1, col=2)

fig.update_layout(template='simple_white', width=1800, height=600, font=dict(size=20),
                  xaxis_title='Scales', yaxis_title='Sample Entropy', title_text=subject_list[subject_idx])
fig.update_yaxes(title_text='Complexity Index', row=1, col=2)
fig.show()
fig.write_html('./{}_EntropyAcrossChannels.html'.format(subject_list[subject_idx]))

***
### 7. Compute MSE across all subjects in a given group (Group-level multichannel average)
- #### a. Load all data for all subjects in `folder_path` and put in a dictionary object
- #### b. Compute MSE for each channel, get the average MSE for each subject, and save the MSE per scale in a df
- #### c. Plot the MSE values across each subject and the average across subjects
- #### z. MSE across all channels and across all subjects in folder_path in wide format CSV

#### a. Load all data for all subjects in `folder_path` and put in a dictionary object

In [None]:
all_data_dict = {} # initialize an empty dictionary to which each subject's data will be added
trimmed_data_length = np.arange(0, 30*sfreq) # define the length of data to trim down to

for subject_name in tqdm(subject_list):
    dpath = '/Users/tannercreel/Desktop/Dissertation/Python_Projects/Cannabis Complexity/Frequent_Users/{}.csv'.format(subject_name)
    data = pd.read_csv(dpath)
    data = data.iloc[:,1:] # remove the first column because that column is the timestamps column

    data = data.iloc[trimmed_data_length,:] # trim the data to just the first 30sec

    all_data_dict[subject_name] = data # add the current subject's data to the dict

#### b. Compute MSE for each channel, get the average MSE for each subject, and save the MSE per scale in a df

In [None]:
scales = 20
scales_list = np.arange(1, scales+1)
chs_to_exclude = ['VEOG', 'HEOG', 'LeftMast', 'RightMast'] # define any channels to exclude

# initialize empty dataframes to which the data will be added in the for loop
mse_across_subjects_df = pd.DataFrame(columns=['Subject','Scale','Entropy'])
ci_across_subjects_df  = pd.DataFrame(columns=['Subject','CI'])

for subject_name in tqdm(subject_list):
    data = all_data_dict[subject_name]
    # Apply the boolean indexer to columns
    data = data.loc[:, [ch not in chs_to_exclude for ch in data.columns]]

    mean_eeg = data.mean(axis=1).values  # compute mean eeg trace
    Msx, CI = EntropyHub.MSEn(mean_eeg, Mobj, Scales=scales)  # compute entropy across scales and complexity index

    # add the current subject's data to the overall dataframes
    subject_mse_df = pd.DataFrame({'Subject':subject_name, 'Scale':scales_list, 'Entropy':Msx})
    mse_across_subjects_df = mse_across_subjects_df.append(subject_mse_df)
    ci_across_subjects_df = ci_across_subjects_df.append(pd.Series({'Subject':subject_name, 'CI':CI}), ignore_index=True)


#### c. Plot the average MSE values across each subject and the average MSE values across all subjects

In [None]:
fig = go.Figure()

# compute mean and standard error across subjects for MSE
avg_mse_across_subjects = mse_across_subjects_df.groupby(['Scale']).mean().reset_index()['Entropy'].values
sem_mse_across_subjects = mse_across_subjects_df.groupby(['Scale']).sem().reset_index()['Entropy'].values

# plot each subject's data
for subject_name in mse_across_subjects_df['Subject'].unique():
    subject_data = mse_across_subjects_df[mse_across_subjects_df['Subject'] == subject_name]

    fig.add_trace(go.Scattergl(x=subject_data['Scale'].values, y=subject_data['Entropy'].values, mode='lines+markers', line=dict(color='slategrey'),
                               marker=dict(color='slategrey', size=8), name=subject_name))

# add line for average across subjects
fig.add_trace(go.Scattergl(x=scales_list, y=avg_mse_across_subjects, error_y=dict(type='data', array=sem_mse_across_subjects, visible=True),
                           mode='lines+markers', line=dict(color='black', width=6),
                           marker=dict(color='black', size=12), name='Average'))

fig.update_layout(template='simple_white', width=1000, height=600, font=dict(size=20),
                  xaxis_title='Scales', yaxis_title='Sample Entropy')
fig.show()


#### z. Compute MSE across all channels and all subjects in folder_path in wide format CSV

In [None]:
# MSE Wide_Format 

# Initialize key parameters
trim_length = 30  # Length of data segments to trim (in seconds)
group_name = 'Frequent_Users'  # Group name for the analysis
scales = 20  # Number of scales for Multiscale Entropy (MSE) calculation
scales_list = np.arange(1, scales + 1)  # List of scales from 1 to the specified number of scales
sfreq = 1000  # Sampling frequency of the data (in Hz)

# Initialize column names for the DataFrame
col_names = ["subject", "channel"]
col_names.extend(["scale" + str(scale) for scale in scales_list])

# Initialize DataFrames to store entropy and complexity index (CI) results
entropy_allsubjects_df = pd.DataFrame()
ci_allsubjects_df = pd.DataFrame(columns=["subject", "channel", "complexity_index"])

# Loop through each subject (example uses the first subject for demonstration)
for i, subject in enumerate(subject_list[:1]):
    # Define the path to the subject's data file
    dpath = '/Users/tannercreel/Desktop/Dissertation/Python_Projects/Cannabis Complexity/{}/{}.csv'.format(group_name, subject)

    # Read the data from the CSV file
    data = pd.read_csv(dpath)
    
    # Trim the data to the specified length
    trimmed_data_length = np.arange(0, trim_length * sfreq)
    data = data.iloc[trimmed_data_length, :]
    
    # Get the channel names from the data
    ch_names = data.columns

    # Initialize a dictionary to store entropy values for each channel
    channel_entropy_dict = {"subject": subject}
    
    # Calculate MSE and CI for each channel
    for ch in ch_names:
        Msx, CI = EntropyHub.MSEn(data[ch].values, Mobj, Scales=scales)
        
        # Update the dictionary with MSE values for each scale
        channel_entropy_dict.update({scale: entropy for scale, entropy in zip(["{}_scale".format(ch) + str(scale) for scale in scales_list], Msx)})
        
        # Append the complexity index to the CI DataFrame
        ci_allsubjects_df = ci_allsubjects_df.append(pd.Series({"subject": subject,
                                                                "channel": ch,
                                                                "complexity_index": CI}), ignore_index=True)
    # Append the entropy values to the entropy DataFrame
    entropy_allsubjects_df = entropy_allsubjects_df.append(channel_entropy_dict, ignore_index=True)
    
    # Print a completion message for the subject
    print("{} has been completed".format(subject))

# Add metadata columns to the DataFrames
entropy_allsubjects_df["group"] = group_name
ci_allsubjects_df["group"] = group_name
entropy_allsubjects_df["trim_length_seconds"] = trim_length
ci_allsubjects_df["trim_length_seconds"] = trim_length

# Optional: Save the DataFrames to CSV files 
# entropy_allsubjects_df.to_csv("/Users/tannercreel/Desktop/Dissertation/Cannabis-MSE/ProcessedData/MSE_All_Groups/MSE_{}.csv".format(group_name))
# ci_allsubjects_df.to_csv("/Users/tannercreel/Desktop/Dissertation/Cannabis-MSE/ProcessedData/MSE_All_Groups/CI_{}.csv".format(group_name))


### 8. Compute MSE and Complexity Index for a specific channel across all subjects in a given group (Group-level single-channel analysis)


In [None]:
# Directory where the EEG data files are stored
data_directory = '/Users/tannercreel/Desktop/Dissertation/Python_Projects/Cannabis Complexity/Non_Users'

# Generate a list of subject names based on the CSV files in the directory
subject_list = [os.path.splitext(file)[0] for file in os.listdir(data_directory) if file.endswith('.csv')]

# Initialize variables
all_data_dict = {}
trimmed_data_length = np.arange(0, 30*sfreq)
scales = 20
scales_list = np.arange(1, scales+1)
target_channel = 'FP2'  # Specify the target channel

# Loop through each subject and store their data
for subject_name in tqdm(subject_list):
    dpath = f'{data_directory}/{subject_name}.csv'
    data = pd.read_csv(dpath)
    data = data.iloc[:,1:]  # Remove the first column (timestamps)
    data = data.iloc[trimmed_data_length,:]  # Trim to the first 30sec
    all_data_dict[subject_name] = data 

# Initialize DataFrame for MSE and CI values
mse_single_channel_df = pd.DataFrame(columns=['Subject', 'Scale', 'Entropy'])
ci_single_channel_df = pd.DataFrame(columns=['Subject', 'CI'])

# Loop through each subject and calculate MSE and CI for the target channel
for subject_name in tqdm(subject_list):
    data = all_data_dict[subject_name]
    ch_data = data[target_channel].values  # Extract data for the target channel
    Msx, CI = EntropyHub.MSEn(ch_data, Mobj, Scales=scales)  # Compute MSE and CI

    # Store MSE values in the DataFrame
    for scale, entropy in zip(scales_list, Msx):
        mse_single_channel_df = mse_single_channel_df.append({
            'Subject': subject_name,
            'Scale': scale,
            'Entropy': entropy
        }, ignore_index=True)
    
    # Store CI value in the DataFrame
    ci_single_channel_df = ci_single_channel_df.append({
        'Subject': subject_name,
        'CI': CI
    }, ignore_index=True)

# Plotting for MSE
fig_mse = go.Figure()

# Compute mean and standard error across subjects for MSE
avg_mse = mse_single_channel_df.groupby(['Scale'])['Entropy'].mean().values
sem_mse = mse_single_channel_df.groupby(['Scale'])['Entropy'].sem().values

# Plot the data for each subject
for subject_name in mse_single_channel_df['Subject'].unique():
    subject_data = mse_single_channel_df[mse_single_channel_df['Subject'] == subject_name]
    fig_mse.add_trace(go.Scattergl(x=subject_data['Scale'], y=subject_data['Entropy'],
                                   mode='lines+markers', name=subject_name))

# Add line for average across subjects
fig_mse.add_trace(go.Scattergl(x=scales_list, y=avg_mse, error_y=dict(type='data', array=sem_mse, visible=True),
                               mode='lines+markers', line=dict(color='black', width=6),
                               marker=dict(color='black', size=12), name='Average'))

fig_mse.update_layout(template='simple_white', width=1000, height=600, font=dict(size=20),
                      xaxis_title='Scales', yaxis_title='Sample Entropy', title=f'Group-Level Single-Channel MSE for {target_channel}')
fig_mse.show()

# Plotting for CI (if needed)
fig_ci = go.Figure()

# Plot the data for CI
for subject_name in ci_single_channel_df['Subject'].unique():
    subject_ci = ci_single_channel_df[ci_single_channel_df['Subject'] == subject_name]['CI'].values[0]
    fig_ci.add_trace(go.Bar(x=[subject_name], y=[subject_ci]))

fig_ci.update_layout(template='simple_white', width=1000, height=600, font=dict(size=20),
                     xaxis_title='Subject', yaxis_title='Complexity Index', title=f'Group-Level Single-Channel CI for {target_channel}')
fig_ci.show()
