# HydroHomies Plots
In this notebook, the plots, figures and also some explanations or details about each of them are being presented.  

To clarify plots, please follow this order:
- Title for each plot is mandatory
- Analysis must be written 
- legends are manedatory

### Importing the needed modules

In [42]:
import yaml
import pandas as pd
import numpy as np

from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.transform import dodge, factor_cmap
from bokeh.models import ColumnDataSource, FactorRange, Whisker
import panel as pn

output_notebook()
pn.extension()

import hvplot.pandas

### Loading all data


In [43]:
with open('config.yaml') as stream:
    config = yaml.safe_load(stream)

### Cleaning (Digit Span Raw Data)

In [44]:
def clean_digit_span(raw_df):
    # Select the sequence length data from the raw data and create a dataframe
    seq_length_df = raw_df[raw_df[1].astype(str).str.match(r'\d+')]

    # Get the value of the longest sequence remembered
    longest = seq_length_df[2]
    longest = longest.tolist()

    # Get the number of errors made
    error_number = seq_length_df[3]
    error_number = error_number.tolist()

    # Select the rows with the click stimulus data
    click_stim_df = raw_df[raw_df[1]=='clickedStim']
    click_stim_df.size

    # Calculate the number of clicks made by the participant
    clicks_observed = click_stim_df.count(axis=1) - 2 
    clicks_observed = clicks_observed.tolist()

    # Calculate the number of clicks that the participant should have made
    clicks_expected =  pd.to_numeric(longest) + 1
    clicks_expected = clicks_expected.tolist()

    # Create a new dataframe with all the values calculated above
    clean_data = pd.DataFrame(data ={'seq length':longest,
                        'errors': error_number,
                        'clicks expected': clicks_expected,
                        'clicks observed':clicks_observed})

    # Return the new dataframe
    return clean_data


### Data Integration For Each Test

In [45]:
def create_merged_df(config_dict):
    data_dict = {}

    # read the files 
    for test, file in config_dict.items():
        df_dict = pd.read_excel(file, sheet_name=None, header=None)

        for session, df in df_dict.items():

            # extracting the participant name and type name
            participant = test.split('_')[-1]
            test_name = test.split('_')[0]

            #extracting repeat number and making its column except for personal
            try:
                type, repeat = session.split('_')
                df.insert(0, 'repeat', repeat)

            except ValueError:
                type = session


            # Running function to clean digit span data
            if test_name == 'digit':
                df = clean_digit_span(df.iloc[3:])
                df.insert(0, 'repeat', repeat)
            
            # verbal fluency test contains header
            elif test_name =='verbal':
                df = df.iloc[1:]

            # inserting the type and participant columns
            df.insert(0, 'type', type)
            df.insert(0, 'participant', participant)
 
            # concatenating data frames of each test
            if test_name not in data_dict:
                data_dict[test_name] = df
            else:
                data_dict[test_name] = pd.concat([data_dict[test_name], df])
    
    return data_dict

data_dict = create_merged_df(config)

In [46]:
# Run the function
df_dict = create_merged_df(config)

---

### Flanker Test Analysis

In [47]:
# creating Flanker dataframe
def create_flanker_dataframe():
    flanker_df = data_dict["flanker"]
    flanker_df.rename(columns={0: "pattern", 1: "expression", 2: "correctness", 3: "response-time"}, inplace=True)
    flanker_df["correctness"] = flanker_df["correctness"].replace(1, "correct")
    flanker_df["correctness"] = flanker_df["correctness"].replace(2, "incorrect")
    flanker_df["correctness"] = flanker_df["correctness"].replace(3, "not-answer")
    return flanker_df

flanker_df = create_flanker_dataframe()

In [48]:
def show_plot(data, title, x_label="", y_label="", palette=["salmon", "skyblue"], factors=["dehydration", "control"]):
    index_cmap = factor_cmap('x', palette=palette, factors=factors, start=1, end=2)
    x = list(data.index.values)
    data_map = {
        'x': x,
        'counts': data.tolist()
        }

    source = ColumnDataSource(data=data_map)
    p = figure(x_range=FactorRange(*x), y_range=(0, 100), height=400, title=title,
               toolbar_location=None, tools="", x_axis_label=x_label, y_axis_label=y_label)

    p.vbar(x='x', top='counts', width=0.9, source=source, fill_color=index_cmap)

    p.y_range.start = 0
    p.x_range.range_padding = 0.1
    p.xaxis.major_label_orientation = 1
    p.xgrid.grid_line_color = None
    return p

def flanker_plot_count(answer_type="correct"):  # Roya
    flanker_df = create_flanker_dataframe()
    flanker_df = flanker_df[flanker_df["correctness"] == answer_type]

    flanker_df = flanker_df.groupby(["participant", "type", "repeat"])[
        "correctness"].count().reset_index()
    data = flanker_df.groupby(by=["participant", "type"])[
        "correctness"].mean()
    return show_plot(data, f"Average of {answer_type} answers", "participant/session", "count" )

answer_types =['correct','incorrect']
inter_plot = pn.interact(flanker_plot_count, answer_type = answer_types)
inter_plot

BokehModel(combine_events=True, render_bundle={'docs_json': {'3dda7525-5c0a-4103-b04a-3dc525cde90f': {'defs': …

### Stroop Test  Analysis

In [49]:
def stroop_test(): # Mahdiye
    total_dict = create_merged_df(config)
    stroop_df = total_dict['stroop']
    stroop_df.drop(stroop_df.columns[[3,7]], axis=1, inplace=True)
    stroop_df = stroop_df.set_axis(['participant', 'type','repeat','word name','word color',
                                    'name_color match','pressed _key','status','reaction_time'], axis=1)
    stroop_df['type&repeat'] = stroop_df['type']+stroop_df['repeat']
    return stroop_df

stroop_df = stroop_test()

In [50]:
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Whisker
import panel as pn
from bokeh.io import output_notebook

output_notebook()
pn.extension()

In [51]:
def individual_stroop_bar_plot(participant='blue'):
    df = stroop_df[stroop_df['participant']==participant]
    
    dff= df.groupby('type&repeat').min().reset_index()
    p = figure(x_range=dff['type&repeat'], height=350, toolbar_location=None, 
               title=f'Stroop Test {participant}', y_axis_label="Reaction time(milliseconds)")
    p.vbar(x=dff['type&repeat'], bottom=0,top=dff['reaction_time'], width=0.5, line_color='white', color=participant)
    return p

#interactive plots
participants_color =['blue','red','orange','green','pink']
inter_plot = pn.interact(individual_stroop_bar_plot, participant = participants_color)
inter_plot

BokehModel(combine_events=True, render_bundle={'docs_json': {'af0ae7ec-0a13-43bc-a0bb-373f4f0a0880': {'defs': …

In [52]:
def individual_stroop_box_plot(participant):
    
    df = stroop_df[stroop_df['participant']==participant]
    kinds = df['type&repeat'].unique()
    
    # compute quantiles
    qs = df.groupby('type&repeat').reaction_time.quantile([0.25, 0.5, 0.75])
    qs = qs.unstack().reset_index()
    qs.columns = ['type&repeat', "q1", "q2", "q3"]
    df = pd.merge(df, qs, on='type&repeat', how="left")

    # compute IQR outlier bounds
    iqr = df.q3 - df.q1
    df["upper"] = df.q3 + 1.5*iqr
    df["lower"] = df.q1 - 1.5*iqr

    source = ColumnDataSource(df)

    p = figure(x_range=kinds,y_range=[0,stroop_df['reaction_time'].max() * 1.3],tools="", toolbar_location=None,
                title="box plot of stroop test "+participant,
               background_fill_color="#eaefef", y_axis_label="Reaction time(milliseconds)")


    # outlier range
    whisker = Whisker(base='type&repeat', upper="upper", lower="lower", source=source)
    whisker.upper_head.size = whisker.lower_head.size = 20
    p.add_layout(whisker)

    # quantile boxes
    p.vbar('type&repeat', 0.5, "q2", "q3", color = participant,bottom=0, source=source, line_color="black")
    p.vbar('type&repeat', 0.5, "q1", "q2", color=participant, bottom=0, source=source, line_color="black")
    
    # outliers
    outliers = df[~df.reaction_time.between(df.lower, df.upper)]
    p.scatter('type&repeat', 'reaction_time', source=outliers, size=6, color="black", alpha=0.5)

    p.xgrid.grid_line_color = None
    p.axis.major_label_text_font_size="14px"
    p.axis.axis_label_text_font_size="12px"

    return p
    
#interactive plots
participants_color =['blue','red','orange','green','pink']
inter_plot = pn.interact(individual_stroop_box_plot, participant = participants_color)
inter_plot

BokehModel(combine_events=True, render_bundle={'docs_json': {'4e9985e2-7373-4bda-9504-30b93eb5d870': {'defs': …

### Stop Signal Analysis

In [53]:

column_meanings = {'Column':[0,1,2,3,4,5,6,7],
                   'Meaning':['trial type (go or nogo)', 
                              'required response (left or right)', 
                              'when the stop signal is shown (or 0 if not)', 
                              'response time 1', 
                              'status 1 (1=correct, 2=wrong, 3=timeout)',
                              'response time 2 (only in no go trials)',
                              'status 2 (only in no go trials; 1=correct, 2=wrong, 3=timeout)',
                              '1=trial is correct ; 0=trial is not correct']} 

column_meanings = pd.DataFrame(column_meanings)
column_meanings.set_index('Column', inplace=True)
column_meanings


Unnamed: 0_level_0,Meaning
Column,Unnamed: 1_level_1
0,trial type (go or nogo)
1,required response (left or right)
2,when the stop signal is shown (or 0 if not)
3,response time 1
4,"status 1 (1=correct, 2=wrong, 3=timeout)"
5,response time 2 (only in no go trials)
6,"status 2 (only in no go trials; 1=correct, 2=w..."
7,1=trial is correct ; 0=trial is not correct


In [54]:
def stop_test(stop_df): # Jacob
    
    # renaming and reordering columns
    stop_df.rename(columns = {0:'trial_type', 1:'correct_resp.', 
                            2:'stop_signal_delay', 3:'response_time',
                            4:'status', 5:'resonse_time_nogo',
                            6:'status_nogo', 7:'correct'}, inplace = True)

    stop_df = stop_df[['participant', 'type', 'repeat', 'trial_type',
                    'correct_resp.', 'correct', 'response_time',
                    'status', 'stop_signal_delay', 'resonse_time_nogo',
                    'status_nogo']]

    # The average resonse time for go trials per trial type
    avg_go_resp_time = stop_df[stop_df['trial_type'] == 'go'].groupby([
        'participant', 'type','status']).mean()['response_time']


    # The average resonse time for no-go trials per correct/incorrect trial
    avg_nogo_resp_time = stop_df[stop_df['trial_type'] == 'nogo'].groupby([
        'participant', 'type','status_nogo']).mean()['response_time']

    # Good to keep in mind that here, status three corresponds with a correct trail
    # Since there was no press in a no-go trial.

    # Number of errors and time-outs in go trials
    errors_timeout_go = stop_df[(stop_df['trial_type'] == 'go') & 
                                (stop_df['status'] != 1.0)].groupby([
                                    'participant', 'type', 'repeat','status']).count()['trial_type']

    # Number of errors and time-outs in no-go trials
    errors_timeout_nogo = stop_df[stop_df['trial_type'] == 'nogo'].groupby([
        'participant', 'type', 'repeat','status_nogo']).count()['trial_type']
    
    return avg_go_resp_time, avg_nogo_resp_time, errors_timeout_go, errors_timeout_nogo

# callig the function
avg_go_resp_time, avg_nogo_resp_time, errors_timeout_go, errors_timeout_nogo = stop_test(data_dict['stop'])


  'participant', 'type','status']).mean()['response_time']
  'participant', 'type','status_nogo']).mean()['response_time']


In [55]:
react_go_boxplot = data_dict['stop'][(data_dict['stop']['trial_type'] == 'go') & 
                                     (data_dict['stop']['correct'] == 1)][['response_time', 'participant', 'type']
                                                      ].hvplot.box(by='type', 
                                                                   groupby='participant',
                                                                   title='Reaction time for correct responses',
                                                                   xlabel='Session Type', 
                                                                   ylabel='Resopnse Time (ms)')

react_go_boxplot

BokehModel(combine_events=True, render_bundle={'docs_json': {'29996cdf-b519-49ca-b253-089f9dc7bcb9': {'defs': …

In [56]:
# TODO: calculate percentage of errors/correct

participants = ['blue', 'green', 'red', 'pink', 'orange']
session_type = ['control', 'dehydratation']

perc_correct = pd.DataFrame(index=[participants])


In [57]:
df = data_dict['stop']
correct = len(df[(df['participant'] == 'blue') &
    (df['type'] == 'dehydration') &
    (df['correct'] == 1)])
total = len(df[(df['participant'] == 'blue') &
    (df['type'] == 'dehydration')])

perc_correct = (correct/total) * 100
perc_correct

86.42857142857143

### Verbal Fluency Analysis

In [58]:
verbal_df = data_dict['verbal'].copy()
verbal_df = verbal_df[verbal_df[1] != 'word count'] # to remove silly headers
verbal_df.rename(columns={0:'word_type', 1:'n'}, inplace=True)
verbal_df['n'] = verbal_df['n'].astype(int)

verbal_avg = verbal_df.groupby(['participant', 'type']).mean().round(2)
error_data = verbal_df.describe().transpose()

verbal_avg_bar = verbal_avg.hvplot.bar(title='Average number of words produced per session type',
                                        xlabel='Participant, Session Type', 
                                        ylabel ='Number of words').opts(xrotation=25)# * error_data.hvplot.errorbars(y='max', yerr1='std')

  verbal_avg = verbal_df.groupby(['participant', 'type']).mean().round(2)


In [59]:
def verbal_test(verbal_df): # Jacob
    verbal_df = data_dict['verbal'].copy()
    verbal_df = verbal_df[verbal_df[1] != 'word count'] # to remove silly headers
    verbal_df.rename(columns={0:'word_type', 1:'n'}, inplace=True)
    verbal_df['n'] = verbal_df['n'].astype(int)

    verbal_avg = verbal_df.groupby(['participant', 'type']).mean().round(2)
    
    verbal_avg_bar = verbal_avg.hvplot.bar(title='Average number of words produced per session type',
                                           xlabel='Participant, Session Type', 
                                           ylabel ='Number of words').opts(xrotation=25)# * verbal_avg.hvplot.errorbars(x=)

    return verbal_df, verbal_avg, verbal_avg_bar

verbal_df, verbal_avg, verbal_avg_bar = verbal_test(data_dict['verbal'])
verbal_avg_bar

  verbal_avg = verbal_df.groupby(['participant', 'type']).mean().round(2)


### Digit Span Analysis

In [None]:
from scipy.stats import sem

def digit_test(digit_df): # Karina
    data_types = {'participant': 'string',
                'type': 'string',
                'repeat': 'int',
                'seq length':'float',
                'errors': 'float',
                'clicks expected': 'float',
                'clicks observed': 'float'
    }
    digit_df = digit_df.astype(data_types)
    digit_span_grouped = digit_df.groupby(['participant','type', 'repeat'])
    digit_span_grouped = digit_span_grouped.agg({'seq length': 'max','errors': 'sum', 'clicks expected':'max','clicks observed':'max'} )

    digit_span_grouped['clicks ratio'] = digit_span_grouped['clicks observed'] - digit_span_grouped['clicks expected']
    digit_span_grouped['seq length'] = digit_span_grouped['seq length'] - 1
    digit_span_grouped

    digit_span_mean_sem = digit_span_grouped.groupby(['participant', 'type']).agg(['mean','sem'])

    seq_length = digit_span_mean_sem['seq length']['mean'].tolist()
    errors = digit_span_mean_sem['errors']['mean'].tolist() 
    participants = digit_span_mean_sem.reset_index().participant.unique().tolist()
    sessions_type = digit_span_mean_sem.reset_index().type.unique().tolist()

    return seq_length, errors, participants, sessions_type

seq_length, errors, participants, sessions_type = digit_test(digit_df = df_dict["digit"])


def digit_barplots(participants,sessions_type, values, palette, y_label):

    x = [ (participant, session) for participant in participants for session in sessions_type]
    #[('blue', 'Control'), ('blue', 'Dehydration'), ('green', 'Control'), ('green', 'Dehydration'), ('orange', 'Control'), ('orange', 'Dehydration'), ('pink', 'Control'), ('pink', 'Dehydration'), ('red', 'Control'), ('red', 'Dehydration')]

    source = ColumnDataSource(data=dict(x=x, counts=values))

    p = figure(x_range=FactorRange(*x), height=350, title="Digit Span",
            toolbar_location=None, tools="")

    p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white",
        fill_color=factor_cmap('x', palette=palette, factors=sessions_type, start=1, end=2))

    p.y_range.start = 0
    p.x_range.range_padding = 0.1
    p.xaxis.major_label_orientation = 1
    p.xgrid.grid_line_color = None

    #  y-axis
    p.yaxis.axis_label = y_label
    p.yaxis.major_label_orientation = "vertical"

    # x-axis
    p.xaxis.axis_label = "Participant"

    return(p)

p_seq_lenght = digit_barplots(participants, sessions_type, values=seq_length, palette=['blue', 'grey'], y_label="Longest sequence remembered")
p_errors = digit_barplots(participants, sessions_type, values=errors, palette=['orange', 'grey'], y_label='Number of errors made')


show(p_seq_lenght)
show(p_errors)

---