# Setup: dependencies, global variables, functions, etc.

In [7]:
import drexel
from importlib import reload
reload(drexel)
from drexel import * # functions and global variables, like data_dir
import mne
from antio import read_cnt
from antio.parser import read_triggers
import pandas as pd
import numpy as np
import seaborn as sn
from matplotlib import pyplot as plt
from matplotlib.ticker import ScalarFormatter
import os
import datetime

mne.set_log_level(verbose=0)

# If data has already been processed and saved in a CSV, do we (True) redo the analysis and overwrite the CSV or (False) simply import the CSV?
overwrite = False

%matplotlib qt

# Impedance analysis
Analysis of the magnitude and stability of electrode impedances.

## Data crunching

In [23]:
for filename in os.listdir(data_dir):
    # Only look at the .CNT files
    if not filename.endswith('.cnt'): continue
    raw = load_CNT(os.path.join(data_dir,filename))
    if raw.duration < 10: continue
    raw.info['subject_info'] = {}
    raw.export(os.path.join(output_dir,os.path.splitext(filename)[0]+'.edf'), fmt='edf')

Re-referenced to pseudo-mastoids
Channel positions loaded for WG_Net_NA-261
Annotations (events) renamed
Eyes open/closed recoded as single annotations with durations of ~20s


  raw.export(os.path.join(output_dir,os.path.splitext(filename)[0]+'.edf'), fmt='edf')


Re-referenced to pseudo-mastoids
Channel positions loaded for WG_Net_NA-261
Annotations (events) renamed
Eyes open/closed recoded as single annotations with durations of ~20s


  raw.export(os.path.join(output_dir,os.path.splitext(filename)[0]+'.edf'), fmt='edf')


Re-referenced to pseudo-mastoids
Channel positions loaded for WG_Net_NA-261
Annotations (events) renamed
Eyes open/closed recoded as single annotations with durations of ~20s


  raw.export(os.path.join(output_dir,os.path.splitext(filename)[0]+'.edf'), fmt='edf')


Re-referenced to pseudo-mastoids
Channel positions loaded for WG_Net_NA-261
Annotations (events) renamed
Eyes open/closed recoded as single annotations with durations of ~20s
Re-referenced to pseudo-mastoids
Channel positions loaded for WG_Net_NA-261
Annotations (events) renamed
Eyes open/closed recoded as single annotations with durations of ~20s


  raw.export(os.path.join(output_dir,os.path.splitext(filename)[0]+'.edf'), fmt='edf')


In [None]:
# Loop through .CNT files in the directory
imp_list = []
for filename in os.listdir(data_dir):
    # Only look at the .CNT files
    if not filename.endswith('.cnt'): continue
    # if not filename.endswith('21-09.cnt'): continue 

# Concatenate impedances from all recordings, convert to long form
df_imp = pd.concat(imp_list, axis=1).melt(value_name='impedance', ignore_index=False).reset_index()

# Add sponge type info (MXene, spare Mxene, white) and some other parameters
df_imp = pd.merge(df_imp, df_electrodes) # this comes from drexel.py which is imported at the start

# Elapsed time in minutes is more useful than absolute time
df_imp['time (minutes)'] = np.round(((df_imp['time'] - df_imp['time'].min()).dt.seconds/60),2)

# Save
df_imp.to_csv(os.path.join(output_dir, 'impedances.csv'), index=False)

print('Done!')

In [None]:
df_imp

## Graphs for impedances (saved to "outputs" folder)

In [None]:
g = sn.relplot(df_imp, kind='line', x='time (minutes)', y='impedance', hue='channel', col='type', marker='o')
plot_touchups_and_save(g, name='impedance_line_type-columns.jpg')

g = sn.relplot(df_imp, kind='line', x='time (minutes)', y='impedance', hue='type', style='channel',marker='o')
plot_touchups_and_save(g, name='impedance_line_type-color.jpg')

g = sn.catplot(df_imp, kind='box', x='time (minutes)', y='impedance', hue='type')
plot_touchups_and_save(g, name='impedance_boxplots.jpg')

g = sn.catplot(df_imp, kind='box', x='time (minutes)', y='impedance', hue='type', row='Hemisphere', row_order=['L','Z','R'])
plot_touchups_and_save(g, name='impedance_boxplots_hemisphere-row.jpg')

g = sn.relplot(df_imp, kind='line', x='time (minutes)', y='impedance', hue='type', marker='o')
plot_touchups_and_save(g, name='impedance_line_type-color-collapsed.jpg')

g = sn.catplot(df_imp, kind='bar', x='time (minutes)', y='impedance', hue='type', palette=['black','grey','lightgrey'], hue_order=['MX','spare','white'], row='Hemisphere',  row_order=['L','Z','R'])
plot_touchups_and_save(g, name='impedance_bar_clustered_hemisphere-rows.jpg')

g = sn.catplot(df_imp, kind='bar', x='time (minutes)', y='impedance', hue='type', palette=['black','grey','lightgrey'])
plot_touchups_and_save(g, name='impedance_bar_clustered.jpg')

In [None]:
sn.relplot(df_imp, kind='line',x='time (minutes)', y=np.log(df_imp['impedance']), hue='channel', col='type')

In [None]:
df_imp.pivot(columns='time',index=['channel','type','AP','Hemisphere'], values='impedance')

# 60Hz noise analysis
More 60Hz means poorer electrical connection between electrodes and scalp. This is a better indicator of signal quality than impedance

## Data crunching

In [None]:
# Loop through .CNT files in the directory
sixty_list = []
for filename in os.listdir(data_dir):
    # Only look at the .CNT files
    if not filename.endswith('.cnt'): continue
    # if not filename.endswith('21-09.cnt'): continue
    
    # Run the analysis on this file, and add it to the list
    sixty_list.append(sixty_hz_analysis(filename))

# Concatenate impedances from all recordings, convert to long form
df_sixty = pd.concat(sixty_list, axis=0)

# Elapsed time in minutes is more useful than absolute time
df_sixty['time (minutes)'] = np.round(((df_sixty['time'] - df_sixty['time'].min()).dt.seconds/60),2)

df_sixty.to_csv(os.path.join(output_dir,'60Hz.csv'))

print('Done!')

## Graphs for 60Hz noise (saved to "outputs" folder)
This is not yet complete

In [None]:
# Try to load data if necessary
del df_sixty
if not 'df_sixty' in locals():
    df_sixty = pd.read_csv(os.path.join(output_dir,'60Hz.csv'))[['channel', '60Hz_uV', 'type_x','time (minutes)']]
    df_sixty.rename(columns={'type_x':'type','time (minutes)':'time'}, inplace=True)
    print('Loading 60Hz data from file')
df = df_sixty.pivot(index=['channel','type'], columns='time', values='60Hz_uV')
# df = df.reindex(df.mean(axis=1).sort_values().index)
df = np.log(df)
df = df.set_index(pd.Series(df.mean(axis=1), name='mean'), append=True).sort_values(['type','mean']).droplevel('mean').T

In [None]:
from scipy.ndimage import gaussian_filter
sn.heatmap(pd.DataFrame(gaussian_filter(df, sigma=[1.5,0]), index=df.index.astype(int), columns=df.columns), xticklabels=True, cmap='coolwarm')

In [None]:
sn.relplot(df_sixty, kind='line',x='time (minutes)', y=np.log(df_sixty['60Hz_uV']), hue='channel', col='type_x')

In [None]:
sn.relplot(df_sixty.query("Hemisphere_x=='Z'"), kind='line',x='time (minutes)', y='60Hz_uV', style='channel', hue='type_x')

In [None]:
g = sn.countplot(df_electrodes[['type','Hemisphere']], x='Hemisphere', hue='type', order=['L','Z','R'], palette=['black','grey','lightgrey'], legend='full')
g.figure.set_size_inches(16,8)
g.figure.tight_layout()
g.figure.savefig(os.path.join(output_dir,'channel_types_table.jpg'), dpi=300)

# Alpha power analysis
Eyes open / eyes closed paradigm

## Data crunching

In [None]:
# If OVERWRITE==False and PSDs.csv exists, we can read in PSDs.csv and skip the time-consuming analysis
if not overwrite:
    try:
        df_alpha = pd.read_csv(os.path.join(output_dir,'PSDs.csv'))
    except:
        pass

In [None]:
# Loop through .CNT files in the directory
power_dict = {}
for filename in os.listdir(data_dir):
    # Only look at the .CNT files
    if not filename.endswith('.cnt'): continue
    # if not filename.endswith('21-09.cnt'): continue
    # Open the data as an mne.Raw object
    raw = load_CNT(os.path.join(data_dir, filename))
    if raw.duration < 10: continue

    psds = {
        cond: v.compute_psd(method='welch', fmin=1, fmax=70) for cond in ['eyes_open','eyes_closed'] for v in [
            mne.concatenate_epochs(
                [mne.make_fixed_length_epochs(
                    seg, duration=2, overlap=0
                ) for seg in raw.crop_by_annotations(annotations = [
                    a for a in raw.annotations if a['description']==cond
                ]
                                                    ) 
                ]
            )] 
         
    }

    power_dict[raw.info['meas_date']] = pd.concat(
        {cond: spectrum.to_data_frame(long_format=True).drop(columns=['condition','ch_type']).set_index(['epoch','channel','freq']) for cond,spectrum in psds.items()},
                                                  axis=0, names=['cond','epoch','channel','freq'])

df_power = pd.concat(power_dict, axis=0, names=['datetime','cond','epoch','channel','freq'])
df_power['time'] = np.round((df_power.index.get_level_values('datetime') - df_power.index.get_level_values('datetime').min()).to_series().dt.total_seconds()/60, 3).values
df_power.index = df_power.index.droplevel('datetime')
df_power = df_power.reset_index().merge(df_electrodes, left_on='channel', right_on='channel', how='left')

In [None]:
df_power = pd.concat(power_dict, axis=0, names=['datetime','cond','epoch','channel','freq'])
df_power['time'] = np.round((df_power.index.get_level_values('datetime') - df_power.index.get_level_values('datetime').min()).to_series().dt.total_seconds()/60, 3).values
df_power.index = df_power.index.droplevel('datetime')
# df_power = df_power.set_index('time', append=True)
df_power = df_power.reset_index().merge(df_electrodes, left_on='channel', right_on='channel', how='left')

In [None]:
g = df_power.query("freq==10").groupby([x for x in df_power.columns if not x in ['epoch','value']])
data = pd.concat([g['value'].mean(), g['value'].std()], axis=1, keys=['mean','SD']).reset_index()
data

In [None]:
sn.relplot(data, kind='line', x='time', y='mean', col='channel', col_wrap=8, hue='cond', style='cond', markers=True, facet_kws={'sharey':False})

In [None]:
sn.relplot(data, kind='line', x='time', y='mean', hue='cond', style='cond', col='type', markers=True, facet_kws={'sharey':False})

In [None]:
sn.relplot(data, kind='line', x='time', y='SD', row='cond', style='cond', hue='type', markers=True, facet_kws={'sharey':False})

In [None]:
sn.catplot(data, kind='violin', x='time', y='mean', row='cond', hue='cond', col='type', sharey=False)

In [None]:
    for name, group in df_annotations.groupby('description'):
        segments = raw.crop_by_annotations(annotations=mne.Annotations(
            onset=group['onset'].values,
            duration=group['duration'].values,
            description=group['description'].values,
        ))
        epochs = mne.concatenate_epochs(
            [mne.make_fixed_length_epochs(seg, duration=2, overlap=0) for seg in segments]
        )
    
        psd = epochs.compute_psd(method='welch', fmin=1, fmax=70)

        # Replace epoch numbers with actual time of onset
        df = psd.to_data_frame().drop(columns=['condition']).melt(id_vars=['epoch','freq'], var_name='channel', value_name='power', ignore_index=False)
        times = [raw.info['meas_date'] + datetime.timedelta(seconds=x) for x in psd.events[:,0]/sfreq]
        mapper = dict(zip(df['epoch'].unique(),times))
        df['time'] = df['epoch'].apply(lambda x: mapper[x])
        df = df.drop(columns=['epoch'])
        df['condition'] = name
        df.set_index(['condition','time','channel','freq'], inplace=True)
        print(df.index[df.index.duplicated()])
        
        alpha_list.append(df.copy())
    
df_alpha = pd.concat(alpha_list)
# Log power is easier to read than power
df_alpha['power'] = np.log(df_alpha['power'])
# Elapsed time in minutes is more useful than absolute time
df_alpha = df_alpha.reset_index()
df_alpha['time'] = np.round(((df_alpha['time'] - df_alpha['time'].min()).dt.seconds/60),3)

# Add sponge type info (MXene, spare Mxene, white) and some other parameters
df_alpha = pd.merge(df_alpha, df_electrodes) # this comes from drexel.py which is imported at the start

df_alpha_save = df_alpha.drop(columns=['AP','Hemisphere']).set_index(['condition','type','channel','freq','time'])
df_alpha_save.to_csv(os.path.join(output_dir,'PSDs.csv'))
df_alpha_save.to_json(os.path.join(output_dir,'PSDs.json'), orient='index')
print('Done!')

In [None]:
raw.annotations.to_data_frame()

In [None]:
df_alpha['condition'] = df_alpha['condition'].astype(categorical)

In [None]:
df_alpha_save.query("not type=='MX'").unstack('time')

In [None]:
sn.relplot(df_alpha,kind='line',x='freq',y='power',hue='condition', col='type')
# sn.relplot(df_alpha.query("channel in ['11R','11L']"),kind='line',x='freq',y='power',hue='condition', col='type')

In [None]:
sn.lmplot(df_alpha.query("freq==10 and AP>9"),x='time',y='power',hue='condition', col='type')
# sn.lmplot(df_alpha.query("(freq==10) and (channel in ['11R','11L'])"),x='time',y='power',hue='condition', col='type')

In [None]:
sn.lmplot(df_alpha.query("(freq==10)"),x='time',y='power',row='condition', hue='channel', col='type')
# sn.lmplot(df_alpha.query("(freq==10) and (channel in ['11R','11L'])"),x='time',y='power',row='condition', hue='type')

In [None]:
df_alpha = pd.concat(alpha_list).reset_index()
# Elapsed time in minutes is more useful than absolute time
df_alpha['time'] = np.round(((df_alpha['time'] - df_alpha['time'].min()).dt.seconds/60),2)
# Log power is easier to read than power
df_alpha['power'] = np.log(df_alpha['power'])

In [None]:
sn.relplot(df_alpha.reset_index().query("freq==10"),kind='scatter',x='time',y='power',hue='condition')

In [None]:
sn.lmplot(df_alpha.reset_index().query("freq==10"), x='time',y='power',hue='condition')