# Muse EEG data quality
This notebook aggregates quality indicators of EEG signal (returned by the muse headband), for each participant.
These values represent the percentage of the time the EEG headband was on the head, and the percentages of a good, medium, and bad quality signal separately for each electrode (obtained from the device).
The data is available for each participant, each stimulus, and study phases.

The quality of EEG signal is reported using [Horse Shoe Indicator (HSI)](https://mind-monitor.com/Technical_Manual.php#help_horseshoe) values provided by the device, which represent how well electrodes fit the participant's head.

Data on signal quality is stored in `headband_dict`, which can be exported to a .json file.

## Setup

In [1]:
import json
import pandas as pd
import os
import configparser
from copy import deepcopy
from tqdm.notebook import tqdm
from pprint import pprint

In [2]:
config = configparser.ConfigParser()
config.read("config.ini")

if not os.path.exists(config['DataDirectories']['unzipped_dataset']):
    raise Exception("Please set path for unzipped dataset in config.ini")

DATA_DIR = config['DataDirectories']['unzipped_dataset']

In [3]:
# generate paths
paths = []
for part in os.listdir(DATA_DIR):
    for f_name in os.listdir(DATA_DIR + '/' + part):
        if '_MUSE' in f_name:
            paths.append((part, f_name.split('_')[1], f_name.split('_')[2], DATA_DIR + '/' + part + '/' + f_name))

In [4]:
def update_emotion_stats(emotion_dict, new_ok_len, new_len):
    """
    Update Avg_HeadBandOn and Num_samples in provided emotion_dict
    """
    old_ok = emotion_dict.get('Avg_HeadBandOn')
    old_len = emotion_dict.get('Num_samples')
    total_len = old_len + new_len
    emotion_dict['Avg_HeadBandOn'] = (old_ok*old_len + new_ok_len)/total_len
    emotion_dict['Num_samples'] = total_len
    
def get_hsi_scores(df, HSI_COLS=['HSI_TP9', 'HSI_AF7', 'HSI_AF8', 'HSI_TP10']):
    """
    Get HSI scores for each channel.
    1 - Good, 2 - Medium, 4 - Bad
    """
    ones = (df[HSI_COLS]==1).sum()
    twos = (df[HSI_COLS]==2).sum()
    fours = (df[HSI_COLS]==4).sum()
    return {col: {1: ones[col]/len(df), 2: twos[col]/len(df), 4: fours[col]/len(df)} for col in HSI_COLS}

def fix_headband_dict(dict_to_fix):
    """
    Delete washout from baseline fields.
    :param dict_to_fix:
    :return:
    """
    for part_dict in dict_to_fix.values():
        for emotion, emo_dict in part_dict.items():
            if emotion == 'BASELINE':
                emo_dict.pop('WASHOUT')

## Get the EEG data

In [5]:
EMOTION_DICT_TEMPLATE = {
    'Avg_HeadBandOn': 0,
    'Num_samples': 0, 
    'STIMULUS': dict(), 
    'QUESTIONNAIRES': dict(), 
    'WASHOUT': dict()
}
HSI_COLS = ['HSI_TP9', 'HSI_AF7', 'HSI_AF8', 'HSI_TP10']
HeadBandOn_COLS = ['HeadBandOn']

headband_dict = dict()


for part, emotion, phase, path in tqdm(paths):
    # load data from one phase
    df = pd.read_json(path)[HeadBandOn_COLS+HSI_COLS]
    df.dropna(inplace=True)
    # add loaded data to headband_dict
    phase_len_ok = len(df[df['HeadBandOn'] == 1])
    phase_len = len(df)
    headband_dict.setdefault(part, dict())
    headband_dict[part].setdefault(
        emotion, 
        deepcopy(EMOTION_DICT_TEMPLATE)
    )
    update_emotion_stats(headband_dict[part][emotion], phase_len_ok, phase_len)    
    headband_dict[part][emotion][phase].setdefault('HeadBandOn', phase_len_ok/phase_len)
    headband_dict[part][emotion][phase].update(get_hsi_scores(df))

# delete unnecessary fields
fix_headband_dict(headband_dict)

  0%|          | 0/1312 [00:00<?, ?it/s]

In [6]:
def get_headband_off_instances(headband_dict):
    """
    Return dict: instances with < 100% HeadBandOn time.
        Participant: Emotion: HeadBandOn time % (value between 0-1)
    """
    ret_dict = dict()
    for part, part_dict in headband_dict.items():
        for emotion, emo_dict in part_dict.items():
            if emo_dict.get('Avg_HeadBandOn') < 1:
                ret_dict.setdefault(part, dict())
                ret_dict[part].setdefault(emotion, emo_dict.get('Avg_HeadBandOn'))
    return ret_dict

## Print and save quality indicators

In [7]:
# print instances with < 100% HeadBandOn time
pprint(get_headband_off_instances(headband_dict))

{'23': {'ANGER': 0.9930110493329372},
 '26': {'DISGUST': 0.9908630192727407, 'LIKING': 0.9925431711145997},
 '30': {'ANGER': 0.9925554199255542, 'LIKING': 0.9927327291985406},
 '33': {'NEUTRAL': 0.9932857628270473},
 '34': {'SURPRISE': 0.9905743178782676},
 '38': {'FEAR': 0.9928443131119995},
 '39': {'AMUSEMENT': 0.0,
        'ANGER': 0.0,
        'AWE': 0.0,
        'BASELINE': 0.2779818266740266,
        'DISGUST': 0.0,
        'ENTHUSIASM': 0.0,
        'LIKING': 0.738687376548752,
        'NEUTRAL': 0.0,
        'SADNESS': 0.0,
        'SURPRISE': 0.0},
 '40': {'LIKING': 0.9927198058614897},
 '41': {'AMUSEMENT': 0.9929977745963798},
 '44': {'BASELINE': 0.9935078589076382},
 '47': {'ENTHUSIASM': 0.9929960868825498},
 '49': {'AWE': 0.9930354851898324, 'SURPRISE': 0.9909755206430398},
 '50': {'AMUSEMENT': 0.9933898894746702,
        'ANGER': 0.0,
        'AWE': 0.002656717284631689,
        'DISGUST': 0.0,
        'ENTHUSIASM': 0.0,
        'FEAR': 0.0,
        'LIKING': 0.0,
        

In [8]:
# save whole headband_dict to .json file 
with open('muse_quality.json', 'w') as f:
    json.dump(headband_dict, f)