In [13]:
import pandas as pd
import eeg
from eeg import unix_to_localdate, unix_to_period
import datetime
import scipy.fftpack
import numpy as np
import matplotlib.pyplot as plt


files = eeg.extractBundledEEG("../../scripts/data/")
files.addMeta("../../scripts/data/eeg-restingstate/events.csv","timeofday")
files.prune()
# files.mergeTagsWithRegex("[cC]lose","Eyes Closed")
files.categories
dataAgg = dict()

bands_ordered = ["delta", "theta", "alpha", "beta", "gamma"]

def load_session(files: dict, qualityCutoffFilter:int = 0, filterSampleWindow: int = -1) -> dict:
    """
    Takes a session of EEG data, computes some metrics, and returns them.
    qualityCutoffFilter: percentage of time electrode data is marked as "good" or "great" required for it to be included in output and analyticsl, 0 capture everything
    filterSampleWindow: window size to sample 

    Potential metrics:
     - average power by band
     - average power by channel
     - relative power by band
     - average focus/calm score
    """
    # Best channels are usually: CP3, CP4, PO3, PO4

    # NOTE: unixTimestamps are int/seconds but samples more often than 1Hz,
    #       so several rows per timestamp and missing sub-second resolution.
    df_pbb = pd.read_json(files["powerByBand"])
    df_sigQ =  pd.read_json(files["signalQuality"])
    df_pbb.set_index("unixTimestamp", inplace=True)
    df_pbb.index = pd.to_datetime(df_pbb.index, unit="s")

    # we have to deal with the channels, such as CP3_alpha, CP3_beta, etc.
    # for now, we will just average them all together

    channels, bands = zip(*[c.split("_") for c in df_pbb.columns])
    channels, bands = list(set(channels)), list(set(bands))

    removedChannels = []
    for channel in channels:
        col = channel + "_status"
        channel_states = df_sigQ[col].value_counts()
        no_of_okay_samples = 0
        if 'good' in channel_states:
            no_of_okay_samples += channel_states['good']
        if 'great' in channel_states:
            no_of_okay_samples += channel_states['great']

        percentage_good = no_of_okay_samples / len(df_sigQ[col])
        if percentage_good<qualityCutoffFilter: removedChannels.append(channel)    
    
    for x in removedChannels: channels.remove(x)
    if removedChannels: print("Channels",*removedChannels,"culled")

    df = pd.DataFrame(index=df_pbb.index)
    for band in bands:
        channels_with_band = [c for c in df_pbb.columns if c.endswith(band) and c.split("_")[0] not in removedChannels]
        df[band] = df_pbb[channels_with_band].mean(axis=1)
    average_band_power = df.mean()[bands_ordered]

    df = pd.DataFrame(index=df_pbb.index)
    for channel in channels:
        bands_for_channel = [c for c in df_pbb.columns if c.startswith(channel)]
        df[channel] = df_pbb[bands_for_channel].mean(axis=1)
    average_channel_power = df.mean()[channels]

    # TODO: split into low(0.3-0.6), medium(0.6-0.7), high(0.7-1.0)
    df_calm = pd.read_json(files["calm"])
    avg_calm_score = df_calm["probability"].mean()
    time_spent_calm = (df_calm["probability"] > 0.3).sum() / len(df_calm)

    df_focus = pd.read_json(files["focus"])
    avg_focus_score = df_focus["probability"].mean()
    time_spent_focused = (df_focus["probability"] > 0.3).sum() / len(df_focus)

    unix_timestamp = int(df_pbb.index[0].timestamp())
    if len(channels)==0: print(df_pbb.index[0].date(),df_pbb.index[0].time(),"WARNING all channels culled")

    
    return {
        "timestamp": unix_timestamp,
        "local_date": unix_to_localdate(unix_timestamp),
        "local_timeofday": unix_to_period(unix_timestamp),
        "duration": df_pbb.index[-1] - df_pbb.index[0],
        "avg_power_per_channel_by_band": {
            channel: {band: df_pbb[channel + "_" + band].mean() for band in bands}
            for channel in channels
        },
        "avg_power_by_band": dict(average_band_power),
        "avg_power_by_channel": dict(average_channel_power),
        "avg_calm_score": avg_calm_score,
        "avg_focus_score": avg_focus_score,
        "time_spent_calm": time_spent_calm,
        "time_spent_focused": time_spent_focused,
        # `relative_power` keys are 2-tuples (band1, band2), values are ratios
        # maybe doesn't need to be computed here,
        # can be computed later from `avg_power_by_band`
        # "relative_power": {},
        #"signal_quality" based on the signal quality data
    }


for x in ["morning","evening"]:
    for y in files.extractByTags(x):
        if x not in dataAgg: dataAgg[x] = {}
        dataStage = load_session(files.extractById(y),.8)["avg_power_per_channel_by_band"]
        if list(dataStage.keys())!=[]: dataAgg[x][y] = dataStage

1674926196 Meta available but no associated recordings
Channels F5 F6 culled
Channels F5 F6 CP4 PO4 C3 C4 CP3 PO3 culled
Channels F5 culled
Channels CP3 culled
Channels C4 culled
Channels C3 PO3 culled
Channels F5 F6 C3 C4 culled
Channels F6 culled
Channels F5 F6 CP4 C3 C4 culled
Channels C3 C4 culled
Channels C3 C4 culled
Channels F5 F6 culled
Channels F5 F6 CP4 PO4 C3 C4 CP3 PO3 culled
Channels C4 culled
Channels F6 CP4 C3 C4 culled
Channels C3 culled


In [14]:
dataAgg = {k : { x : [{y+"_"+z : dataAgg[k][x][y][z] for z in dataAgg[k][x][y]} for y in dataAgg[k][x]] for x in dataAgg[k]} for k in dataAgg}
for x in dataAgg:
    for y in dataAgg[x]:
        accumulated = {}
        for z in dataAgg[x][y]:
            accumulated.update(z)
        dataAgg[x][y] = accumulated

Simple Aggregated Trials

In [15]:
morning = pd.DataFrame.from_dict(dataAgg["morning"])
print("std",morning.std(axis=1, numeric_only=True))
print("mean",morning.mean(axis=1, numeric_only=True))
# print(morning)

std CP4_gamma     1.283401
CP4_delta    13.241922
CP4_beta      5.287791
CP4_alpha     6.638449
CP4_theta    10.485589
PO4_gamma     1.030901
PO4_delta     7.543883
PO4_beta      4.005294
PO4_alpha     4.441743
PO4_theta     6.230794
C3_gamma      0.443865
C3_delta      6.288217
C3_beta       1.777248
C3_alpha      2.780732
C3_theta      5.044589
C4_gamma      0.429914
C4_delta      3.784041
C4_beta       1.913075
C4_alpha      2.338994
C4_theta      3.391546
CP3_gamma     0.163022
CP3_delta     6.825569
CP3_beta      0.901033
CP3_alpha     2.192686
CP3_theta     5.052099
PO3_gamma     0.627288
PO3_delta     6.348347
PO3_beta      2.724573
PO3_alpha     3.343310
PO3_theta     5.151493
F6_gamma      0.182061
F6_delta      3.672927
F6_beta       0.787099
F6_alpha      1.494697
F6_theta      2.870052
F5_gamma      0.422453
F5_delta     16.226270
F5_beta       2.069621
F5_alpha      5.371069
F5_theta     13.963967
dtype: float64
mean CP4_gamma     0.712729
CP4_delta    10.361068
CP4_beta  

In [16]:
evening = pd.DataFrame.from_dict(dataAgg["evening"])
print("std",evening.std(axis=1, numeric_only=True))
print("mean",evening.mean(axis=1, numeric_only=True))

std F5_gamma     0.074663
F5_delta     2.671588
F5_beta      0.374376
F5_alpha     0.841074
F5_theta     1.976707
F6_gamma     0.074279
F6_delta     3.083102
F6_beta      0.464484
F6_alpha     0.937232
F6_theta     2.217538
CP4_gamma    0.088774
CP4_delta    1.751423
CP4_beta     0.398126
CP4_alpha    0.567327
CP4_theta    1.372702
PO4_gamma    0.200177
PO4_delta    2.777362
PO4_beta     1.000813
PO4_alpha    1.328056
PO4_theta    2.138421
C3_gamma     0.153004
C3_delta     1.628914
C3_beta      0.650102
C3_alpha     0.897528
C3_theta     1.250981
C4_gamma     0.360439
C4_delta     3.967537
C4_beta      1.698760
C4_alpha     2.052035
C4_theta     2.978777
CP3_gamma    0.143306
CP3_delta    3.780843
CP3_beta     0.621007
CP3_alpha    1.319824
CP3_theta    3.141521
PO3_gamma    0.059875
PO3_delta    2.444514
PO3_beta     0.339979
PO3_alpha    0.738748
PO3_theta    1.806590
dtype: float64
mean F5_gamma     0.246900
F5_delta     5.655602
F5_beta      1.280692
F5_alpha     2.398272
F5_theta