# HydroHomies Plots
In this notebook, the plots, figures and also some explanations or details about each of them are being presented.  

To clarify plots, please follow this order:
- Title for each plot is mandatory
- Analysis must be written 
- legends are manedatory

### Importing the needed modules

In [75]:
import yaml
import pandas as pd
import numpy as np

from bokeh.plotting import figure, show
from bokeh.io import show, output_notebook
from bokeh.transform import dodge, factor_cmap
from bokeh.models import ColumnDataSource, FactorRange, Whisker
import panel as pn
from scipy.stats import sem
from bokeh.transform import factor_cmap
from bokeh.layouts import gridplot


output_notebook()
pn.extension()

import hvplot.pandas

### Loading all data


In [76]:
with open('config.yaml') as stream:
    config = yaml.safe_load(stream)

### Cleaning (Digit Span Raw Data)

In [77]:
def clean_digit_span(raw_df):
    # Select the sequence length data from the raw data and create a dataframe
    seq_length_df = raw_df[raw_df[1].astype(str).str.match(r'\d+')]

    # Get the value of the longest sequence remembered
    longest = seq_length_df[2]
    longest = longest.tolist()

    # Get the number of errors made
    error_number = seq_length_df[3]
    error_number = error_number.tolist()

    # Select the rows with the click stimulus data
    click_stim_df = raw_df[raw_df[1]=='clickedStim']
    click_stim_df.size

    # Calculate the number of clicks made by the participant
    clicks_observed = click_stim_df.count(axis=1) - 3 
    clicks_observed = clicks_observed.tolist()

    # Calculate the number of clicks that the participant should have made
    clicks_expected =  pd.to_numeric(longest) + 1
    clicks_expected = clicks_expected.tolist()

    # Create a new dataframe with all the values calculated above
    clean_data = pd.DataFrame(data ={'seq length':longest,
                        'errors': error_number,
                        'clicks expected': clicks_expected,
                        'clicks observed':clicks_observed})

    # Return the new dataframe
    return clean_data


### Data Integration For Each Test

In [78]:
def create_merged_df(config_dict):
    data_dict = {}

    # read the files 
    for test, file in config_dict.items():
        df_dict = pd.read_excel(file, sheet_name=None, header=None)

        for session, df in df_dict.items():

            # extracting the participant name and type name
            participant = test.split('_')[-1]
            test_name = test.split('_')[0]

            #extracting repeat number and making its column except for personal
            try:
                type, repeat = session.split('_')
                df.insert(0, 'repeat', repeat)

            except ValueError:
                type = session


            # Running function to clean digit span data
            if test_name == 'digit':
                df = clean_digit_span(df.iloc[3:])
                df.insert(0, 'repeat', repeat)
            
            # verbal fluency test contains header
            elif test_name =='verbal':
                df = df.iloc[1:]

            # inserting the type and participant columns
            df.insert(0, 'type', type)
            df.insert(0, 'participant', participant)
 
            # concatenating data frames of each test
            if test_name not in data_dict:
                data_dict[test_name] = df
            else:
                data_dict[test_name] = pd.concat([data_dict[test_name], df])
    
    return data_dict

data_dict = create_merged_df(config)

In [79]:
# Run the function
df_dict = create_merged_df(config)

---

### Personal health data plots

In [80]:
# creating personal dataframe
def create_personal_dataframe():
    p_df = data_dict["personal"].copy()
    p_df.drop(0, inplace=True)
    p_df.rename(columns={
        0: "session",
        1: "time",
        2: "heartrate",
        3: "calories",
        4:"temperature",
        5:"body weight",
        6: "muscle%",
        7: "fat%",
    }, inplace=True)
    p_df = p_df[[
        "participant",
        "type",
        "session",
        "time",
        "heartrate",
        "calories",
        "temperature",
        "body weight",
        "muscle%",
        "fat%"
    ]]
    
    # fill missing and not correct values with the correct one.
    p_df["heartrate"] = pd.to_numeric(p_df["heartrate"],errors='coerce')
    p_df["heartrate"] = p_df["heartrate"].fillna(85)
    p_df['session'] = p_df['session'].fillna(2)
    p_df["calories"] = p_df["calories"].fillna(1118)
    p_df["temperature"] = p_df["temperature"].fillna(36.4)
    
    p_df = p_df.astype({'heartrate': 'float', 'calories': 'float', 'temperature': 'float',
                       'body weight': 'float', 'fat%': 'float', 'muscle%': 'float'})

    return p_df

personal_df = create_personal_dataframe()

In [81]:
def show_personal_plot(participant='pink', target = 'calories'):

    personal_df = create_personal_dataframe()
    personal_df = personal_df[personal_df["participant"] == participant]

    p = figure(x_range = [personal_df['time'].min(),personal_df['time'].max()*1.02], 
               y_range = [personal_df[target].min()*.5,personal_df[target].max()*1.3],
               title=target, x_axis_label="time(minutes)", y_axis_label=target)

    x = personal_df["time"].unique().tolist()
    y1 = personal_df[(personal_df["type"] == "dehydration") & (personal_df["session"] == 1)][target].tolist()
    y2 = personal_df[(personal_df["type"] == "dehydration") & (personal_df["session"] == 2)][target].tolist()
    y3 = personal_df[(personal_df["type"] == "control") & (personal_df["session"] == 1)][target].tolist()
    y4 = personal_df[(personal_df["type"] == "control") & (personal_df["session"] == 2)][target].tolist()
    
    # add multiple renderers
    p.line(x, y1, legend_label="dehydration1", color="blue", line_width=2)
    p.line(x, y2, legend_label="dehydration2", color="red", line_width=2)
    p.line(x, y3, legend_label="control1", color="green", line_width=2)
    p.line(x, y4, legend_label="control2", color="orange", line_width=2)
    return p

participants =['green','pink', 'orange','blue','red']
targets = ["heartrate", "calories", "temperature"]
personal_plot = pn.interact(show_personal_plot, participant=participants, target = targets)
personal_plot

In [82]:
df = personal_df
def error_bar(target):
    
    #create new column containing participant and the type of test
    df['participant_type'] = df['participant']+' '+df['type']

    # calculate SE
    dff = df.groupby('participant_type').mean().reset_index()
    std = df.groupby('participant_type').std().reset_index()
    n = df.groupby('participant_type').count().reset_index()
    
    name_list = list(dff['participant_type'])
    mean_list = list(dff[target])
    std_list = list(std[target])
    n_list = list(n[target])
    se_list = std_list/np.sqrt(n_list)

    #calculate upper and lower
    upper = mean_list + se_list*1.96
    lower = mean_list - se_list*1.96
    
    # create new dataframe
    data = {'participants_type' : name_list,
            'required_mean' : mean_list,
            'SE' : se_list,
            'upper' : upper,
            'lower' : lower,
            }
    data = pd.DataFrame(data)

    source = ColumnDataSource(data)

    # plot

    p = figure(x_range= name_list, y_range = [data['lower'].min()*.99,data['upper'].max()*1.03],  width=900, 
               height=500, title= target +" error bar", toolbar_location=None, tools="")

    p.circle(x='participants_type', y= 'required_mean', width=0.9, color='blue', source=source)

    whisker = Whisker(base='participants_type', upper="upper", lower="lower", source=source)
    whisker.upper_head.size = whisker.lower_head.size = 20
    p.add_layout(whisker)
    return p

targets = ["heartrate", "calories", "temperature", "body weight", "fat%", "muscle%"]
personal_error_plot = pn.interact(error_bar, target = targets)
personal_error_plot

### Flanker Test Analysis

In [83]:
# creating Flanker dataframe
def create_flanker_dataframe():
    flanker_df = data_dict["flanker"]
    flanker_df.rename(columns={0: "pattern", 1: "expression", 2: "correctness", 3: "response-time"}, inplace=True)
    flanker_df["correctness"] = flanker_df["correctness"].replace(1, "correct")
    flanker_df["correctness"] = flanker_df["correctness"].replace(2, "incorrect")
    flanker_df["correctness"] = flanker_df["correctness"].replace(3, "not-answer")
    return flanker_df

flanker_df = create_flanker_dataframe()

In [84]:
def show_plot(data, title, x_label="", y_label="", palette=["salmon", "skyblue"], factors=["dehydration", "control"]):
    index_cmap = factor_cmap('x', palette=palette, factors=factors, start=1, end=2)
    x = list(data.index.values)
    data_map = {
        'x': x,
        'counts': data.tolist()
        }

    source = ColumnDataSource(data=data_map)
    p = figure(x_range=FactorRange(*x), y_range=(0, 100), height=400, title=title,
               toolbar_location=None, tools="", x_axis_label=x_label, y_axis_label=y_label)

    p.vbar(x='x', top='counts', width=0.9, source=source, fill_color=index_cmap)

    p.y_range.start = 0
    p.x_range.range_padding = 0.1
    p.xaxis.major_label_orientation = 1
    p.xgrid.grid_line_color = None
    return p

def flanker_calculate_counts(flanker_df, answer_type="correct"):
    flanker_df = flanker_df[flanker_df["correctness"] == answer_type]
    flanker_df = flanker_df.groupby(["participant", "type", "repeat"])["correctness"].count().reset_index()
    data = flanker_df.groupby(by=["participant", "type"])["correctness"].mean()
    return data

def flanker_plot_count(answer_type="correct"):  # Roya
    flanker_df = create_flanker_dataframe()
    data = flanker_calculate_counts(flanker_df, answer_type)
    return show_plot(data, f"Average of {answer_type} answers", "participant/session", "count" )

answer_types =['correct','incorrect']
flanker_counts = pn.interact(flanker_plot_count, answer_type = answer_types)
flanker_counts

In [85]:
def calculate_percentage(flanker_df, answer_type="correct"):
    df_all = flanker_df.groupby(["participant", "type", "repeat"]).agg(count=("correctness", "count"))
    flanker_df = flanker_df[flanker_df["correctness"] == answer_type]
    df_correct = flanker_df.groupby(["participant", "type", "repeat"]).agg(count=("correctness", "count"))

    flanker_df = round(df_correct["count"] * 100 / df_all["count"], 2).rename("correctness").reset_index().fillna(0)
    data = flanker_df.groupby(by=["participant", "type"])[
        "correctness"].mean()
    return data
    
def flanker_plot_percentage(answer_type="correct"): 
    flanker_df = create_flanker_dataframe()
    data = calculate_percentage(flanker_df, answer_type)
    return show_plot(data, f"Percentage of {answer_type} answers", "participant/session", "Percentage" )



answer_types =['correct','incorrect']
flanker_percentage = pn.interact(flanker_plot_percentage, answer_type = answer_types)
flanker_percentage

In [86]:
(flanker_df[(flanker_df['correctness'] == 'correct') & (flanker_df['participant'] == 'red')])

Unnamed: 0,participant,type,repeat,pattern,expression,correctness,response-time,4,5,6,7,8
0,red,control,1,BBXBB,0.0,correct,1344.0,,,,,
1,red,control,1,XXXXX,1.0,correct,818.0,,,,,
2,red,control,1,VVXVV,0.0,correct,617.0,,,,,
3,red,control,1,VVXVV,0.0,correct,602.0,,,,,
4,red,control,1,CCCCC,1.0,correct,1084.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
45,red,dehydration,2,VVCVV,0.0,correct,1159.0,,,,,
46,red,dehydration,2,XXXXX,1.0,correct,595.0,,,,,
47,red,dehydration,2,BBVBB,1.0,correct,838.0,,,,,
48,red,dehydration,2,BBVBB,1.0,correct,717.0,,,,,


In [87]:
flanker_df = create_flanker_dataframe()
#dfi = flanker_df.interactive(loc='top').to_dataframe()
Flanker_test_boxplot = flanker_df[flanker_df['correctness'] == 'correct'][
    [
        'response-time', 
        'participant', 
        'type'
    ]].hvplot.box(
        by='type', 
        groupby='participant',
        title='Reaction time for correct responses',
        xlabel='Session Type', 
        ylabel='Resopnse Time (ms)',height=400, width=400)
    
Flanker_test_boxplot



## Error Plot

In [88]:
def calculate_standard_error(flanker_df, answer_type="correct"):
    flanker_df = flanker_df[flanker_df["correctness"] == answer_type]
    flanker_df = flanker_df.groupby(["participant", "type", "repeat"])["correctness"].count().reset_index()
    df_mean = flanker_df.groupby(by=["participant", "type"]).agg(mean=("correctness", "mean"))
    df_se = flanker_df.groupby(by=["participant", "type"]).agg(se=("correctness", "sem"))
    upper = df_mean["mean"] + 1.96 * df_se["se"]
    lower = df_mean["mean"] - 1.96 * df_se["se"]
    data = pd.concat([upper.rename("upper"), lower.rename("lower")], axis=1)
    return data

def plot_standard_error(plot, data):
    x = list(data.index.values)
    data_map = {
        'x': x,
        'upper': data["upper"].tolist(),
        'lower': data["lower"].tolist()

        }
    source = ColumnDataSource(data=data_map)

    w = Whisker(source=source, base="x", upper="upper", lower="lower", 
            line_color='purple', level="overlay")
    w.upper_head.line_color = 'purple'
    w.lower_head.line_color = 'purple'
    w.upper_head.size = w.lower_head.size = 20
    plot.add_layout(w)
    return plot

def flanker_plot_error(answer_type="correct"):  # Roya
    flanker_df = create_flanker_dataframe()
    data = flanker_calculate_counts(flanker_df, answer_type)
    data_se = calculate_standard_error(flanker_df, answer_type)
    p = show_plot(data, f"Standard Error of {answer_type} answers", "participant/session", "SE" )
    p = plot_standard_error(p, data_se)
    return p


answer_types =['correct','incorrect']
inter_plot = pn.interact(flanker_plot_error, answer_type = answer_types)
inter_plot

### Stroop Test  Analysis

In [89]:
def stroop_test(): # Mahdiye
    total_dict = create_merged_df(config)
    stroop_df = total_dict['stroop']
    stroop_df.drop(stroop_df.columns[[3,7]], axis=1, inplace=True)
    stroop_df = stroop_df.set_axis(['participant', 'type','repeat','word name','word color',
                                    'name_color match','pressed _key','status','reaction_time'], axis=1)
    return stroop_df

stroop_df = stroop_test()

In [90]:
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Whisker
import panel as pn
from bokeh.io import output_notebook

output_notebook()
pn.extension()

In [91]:
def individual_stroop_bar_plot(participant='blue'):
    df = stroop_df[stroop_df['participant']==participant]
    
    dff= df.groupby('type').min().reset_index()
    p = figure(x_range=dff['type'], height=350, toolbar_location=None, 
               title=f'Stroop Test {participant}', y_axis_label="Reaction time(milliseconds)")
    p.vbar(x=dff['type'], bottom=0,top=dff['reaction_time'], width=0.5, line_color='white', color=participant)
    return p

#interactive plots
participants_color =['blue','red','orange','green','pink']
inter_plot = pn.interact(individual_stroop_bar_plot, participant = participants_color)
inter_plot

In [92]:
def individual_stroop_box_plot(participant):
    
    df = stroop_df[stroop_df['participant']==participant]
    kinds = df['type'].unique()
    
    # compute quantiles
    qs = df.groupby('type').reaction_time.quantile([0.25, 0.5, 0.75])
    qs = qs.unstack().reset_index()
    qs.columns = ['type', "q1", "q2", "q3"]
    df = pd.merge(df, qs, on='type', how="left")

    # compute IQR outlier bounds
    iqr = df.q3 - df.q1
    df["upper"] = df.q3 + 1.5*iqr
    df["lower"] = df.q1 - 1.5*iqr

    source = ColumnDataSource(df)

    p = figure(x_range=kinds,y_range=[-100,stroop_df['reaction_time'].max() * 1.3],tools="", toolbar_location=None,
                title="box plot of stroop test "+participant,
               background_fill_color="#eaefef", y_axis_label="Reaction time(milliseconds)")


    # outlier range
    whisker = Whisker(base='type', upper="upper", lower="lower", source=source)
    whisker.upper_head.size = whisker.lower_head.size = 20
    p.add_layout(whisker)

    # quantile boxes
    p.vbar('type', 0.5, "q2", "q3", color = participant,bottom=0, source=source, line_color="black")
    p.vbar('type', 0.5, "q1", "q2", color=participant, bottom=0, source=source, line_color="black")
    
    # outliers
    outliers = df[~df.reaction_time.between(df.lower, df.upper)]
    p.scatter('type', 'reaction_time', source=outliers, size=6, color="black", alpha=0.5)

    p.xgrid.grid_line_color = None
    p.axis.major_label_text_font_size="14px"
    p.axis.axis_label_text_font_size="12px"

    return p
    
#interactive plots
participants_color =['blue','red','orange','green','pink']
stroop_boxplot = pn.interact(individual_stroop_box_plot, participant = participants_color)
stroop_boxplot

In [93]:
def status_bar_stroop(ans = 'correct'):
    
    types = list(stroop_df['type'].unique())
    participants = list(stroop_df['participant'].unique())
    de_list = []
    co_list = []
    for participant in participants:
        df = stroop_df[stroop_df['participant'] ==participant]
        dff = df[df['type'] =='dehydration']
        if ans =='correct':
            de_list.append(len(dff[dff['status']==1]))
            
        if ans == 'not correct':
            de_list.append(len(dff[dff['status']==2]))
            
        if ans == 'not answer':
            de_list.append(len(dff[dff['status']==3]))
            
        dff2 = df[df['type'] =='control']
        if ans =='correct':
            co_list.append(len(dff2[dff2['status']==1]))
            
        if ans == 'not correct':
            co_list.append(len(dff2[dff2['status']==2]))
            
        if ans == 'not answer':
            co_list.append(len(dff2[dff2['status']==3]))
    
    data = {'participants' : participants,
            'control'   : co_list,
            'dehydration'   : de_list,
            }
    data = pd.DataFrame(data)

    palette =  ["skyblue", "salmon"] #colors
    x = [ (participant, test) for participant in participants for test in types ]
    counts = sum(zip(data['control'], data['dehydration']), ()) # like an hstack

    source = ColumnDataSource(data=dict(x=x, counts=counts))
    # plot
    p = figure(x_range=FactorRange(*x), width=900, height=500, title='The number of' +ans+ 'of stroop test',
               toolbar_location=None,y_axis_label= ans+ ' count', x_axis_label="participant, session type")

    p.vbar(x='x', top='counts', width=1, source=source, line_color="white",
           fill_color=factor_cmap('x', palette=palette, factors=types, start=1, end=2))

    p.y_range.start = 0
    p.x_range.range_padding = 0.1
    p.xaxis.major_label_orientation = 1
    p.xgrid.grid_line_color = None
    return p

answers =['correct','not correct','not answer']
stroop_bar_status = pn.interact(status_bar_stroop, ans = answers)
stroop_bar_status

In [94]:
def total_bar_stroop():

    df = stroop_df

    #create a list of different session types
    types = list(df['type'].unique())
    
    dff = df.groupby(['participant','type']).mean().reset_index()
    
    # create a list of participants
    participants = list(dff['participant'].unique())

    #create two list of reaction time regarding session types
    control_mean = list(dff[dff['type'] =='control'].reaction_time)
    dehydration_mean = list(dff[dff['type'] =='dehydration'].reaction_time)

    #create a dictionary of 3 keys and values and then convert into a dataframe
    data = {'participants' : participants,
            'control'   : control_mean,
            'dehydration'   : dehydration_mean,
            }
    data = pd.DataFrame(data)

    palette =  ["skyblue", "salmon"] #colors

    # create a list like:
    # [ ("blue", "control"), ("Ablue", "dehydration"), ("red", "control"), ("red", "dehydration"), ... ]
    x = [ (participant, test) for participant in participants for test in types ]
    counts = sum(zip(data['control'], data['dehydration']), ()) # like an hstack

    source = ColumnDataSource(data=dict(x=x, counts=counts))
    # plot
    p = figure(x_range=FactorRange(*x), y_range=[0, data['dehydration'].max()+200], width=900, height=500, 
                title='Average of reaction time stroop test',
                toolbar_location=None,y_axis_label="Reaction time(milliseconds)",
                x_axis_label="participant, session type")

    p.vbar(x='x', top='counts', width=1, source=source, line_color="white",
           fill_color=factor_cmap('x', palette=palette, factors=types, start=1, end=2))

    p.y_range.start = 0
    p.x_range.range_padding = 0.1
    p.xaxis.major_label_orientation = 1
    p.xgrid.grid_line_color = None

    return p

#### Stroop error bar

In [95]:
def calculate_standard_error(stroop_df):
    
    df_mean = stroop_df.groupby(by=["participant", "type"]).agg(mean=("reaction_time", "mean"))
    df_se = stroop_df.groupby(by=["participant", "type"]).agg(se=("reaction_time", "sem"))
    upper = df_mean["mean"] + 1.96 * df_se["se"]
    lower = df_mean["mean"] - 1.96 * df_se["se"]
    data = pd.concat([upper.rename("upper"), lower.rename("lower")], axis=1)
    return data



def plot_standard_error(plot, data):
    x = list(data.index.values)
    data_map = {
        'x': x,
        'upper': data["upper"].tolist(),
        'lower': data["lower"].tolist()

        }
    source = ColumnDataSource(data=data_map)

    w = Whisker(source=source, base="x", upper="upper", lower="lower", 
            line_color='purple', level="overlay")
    w.upper_head.line_color = 'purple'
    w.lower_head.line_color = 'purple'
    w.upper_head.size = w.lower_head.size = 20
    plot.add_layout(w)
    return plot

def stroop_plot_error_bar():
    data_se = calculate_standard_error(stroop_df)
    p = total_bar_stroop()
    p = plot_standard_error(p, data_se)
    return p

stroop_barplot = stroop_plot_error_bar()
show(stroop_barplot)

In [96]:
def error_bar(df = stroop_df, required = 'reaction_time',test_name = 'stroop_test'):
    
    #create new column containing participant and the type of test
    df['participant_type'] = df['participant']+' '+df['type']

    # calculate SE
    dff = df.groupby('participant_type').mean().reset_index()
    std = df.groupby('participant_type').std().reset_index()
    n = df.groupby('participant_type').count().reset_index()

    name_list = list(dff['participant_type'])
    mean_list = list(dff[required])
    std_list = list(std[required])
    n_list = list(n[required])
    se_list = std_list/np.sqrt(n_list)

    #calculate upper and lower
    upper = mean_list + se_list*1.96
    lower = mean_list - se_list*1.96
    
    # create new dataframe
    data = {'participants_type' : name_list,
            'required_mean' : mean_list,
            'SE' : se_list,
            'upper' : upper,
            'lower' : lower,
            }
    data = pd.DataFrame(data)

    source = ColumnDataSource(data)

    # plot

    p = figure(x_range= name_list, y_range = [data['lower'].min()-100,data['upper'].max()+100],  width=900, 
               height=500, title="Error bar "+ test_name, toolbar_location=None, tools="")

    p.circle(x='participants_type', y= 'required_mean', width=0.9, color='blue', source=source)

    whisker = Whisker(base='participants_type', upper="upper", lower="lower", source=source)
    whisker.upper_head.size = whisker.lower_head.size = 20
    p.add_layout(whisker)
    return p

show(error_bar(df = stroop_df, required = 'reaction_time',test_name = 'stroop_test'))


### Stop Signal Analysis

In [97]:

column_meanings = {'Column':[0,1,2,3,4,5,6,7],
                   'Meaning':['trial type (go or nogo)', 
                              'required response (left or right)', 
                              'when the stop signal is shown (or 0 if not)', 
                              'response time 1', 
                              'status 1 (1=correct, 2=wrong, 3=timeout)',
                              'response time 2 (only in no go trials)',
                              'status 2 (only in no go trials; 1=correct, 2=wrong, 3=timeout)',
                              '1=trial is correct ; 0=trial is not correct']} 

column_meanings = pd.DataFrame(column_meanings)
column_meanings.set_index('Column', inplace=True)
column_meanings


Unnamed: 0_level_0,Meaning
Column,Unnamed: 1_level_1
0,trial type (go or nogo)
1,required response (left or right)
2,when the stop signal is shown (or 0 if not)
3,response time 1
4,"status 1 (1=correct, 2=wrong, 3=timeout)"
5,response time 2 (only in no go trials)
6,"status 2 (only in no go trials; 1=correct, 2=w..."
7,1=trial is correct ; 0=trial is not correct


In [98]:
def stop_test(stop_df): # Jacob
    
    # renaming and reordering columns
    stop_df.rename(columns = {0:'trial_type', 1:'correct_resp.', 
                            2:'stop_signal_delay', 3:'response_time',
                            4:'status', 5:'resonse_time_nogo',
                            6:'status_nogo', 7:'correct'}, inplace = True)

    stop_df = stop_df[['participant', 'type', 'repeat', 'trial_type',
                    'correct_resp.', 'correct', 'response_time',
                    'status', 'stop_signal_delay', 'resonse_time_nogo',
                    'status_nogo']]

    # The average resonse time for go trials per trial type
    avg_go_resp_time = stop_df[stop_df['trial_type'] == 'go'].groupby([
        'participant', 'type','status']).mean()['response_time']


    # The average resonse time for no-go trials per correct/incorrect trial
    avg_nogo_resp_time = stop_df[stop_df['trial_type'] == 'nogo'].groupby([
        'participant', 'type','status_nogo']).mean()['response_time']

    # Good to keep in mind that here, status three corresponds with a correct trail
    # Since there was no press in a no-go trial.

    # Number of errors and time-outs in go trials
    errors_timeout_go = stop_df[(stop_df['trial_type'] == 'go') & 
                                (stop_df['status'] != 1.0)].groupby([
                                    'participant', 'type', 'repeat','status']).count()['trial_type']

    # Number of errors and time-outs in no-go trials
    errors_timeout_nogo = stop_df[stop_df['trial_type'] == 'nogo'].groupby([
        'participant', 'type', 'repeat','status_nogo']).count()['trial_type']
    
    return avg_go_resp_time, avg_nogo_resp_time, errors_timeout_go, errors_timeout_nogo

# callig the function
avg_go_resp_time, avg_nogo_resp_time, errors_timeout_go, errors_timeout_nogo = stop_test(data_dict['stop'])


In [99]:
stop_signal_boxplot = data_dict['stop'][(data_dict['stop']['trial_type'] == 'go') & 
                                     (data_dict['stop']['correct'] == 1)][['response_time', 'participant', 'type']
                                                      ].hvplot.box(by=['participant', 'type'], 
                                                                   #groupby='participant',
                                                                   title='Reaction time for correct responses',
                                                                   xlabel='Session Type', 
                                                                   ylabel='Resopnse Time (ms)',
                                                                   rot=40)

stop_signal_boxplot

### Verbal Fluency Analysis

In [111]:
def barplot(df):
    """
    This function plots a barplot based on a dataframe from within the data_dict

    Args:
        df (DataFrame): a dataframe

    Returns:
        p: a barplot
        
    By:
        Mahdiye
        Jacob Menzinga
    """
  
    # create a list of different session types
    types = list(df['type'].unique())
    
    dff = df.groupby(['participant','type']).mean().reset_index()
    
    # create a list of participants
    participants = list(dff['participant'].unique())

    #create two list of reaction time regarding session types
    control_mean = list(dff[dff['type'] =='control'].n)
    dehydration_mean = list(dff[dff['type'] =='dehydration'].n)

    #create a dictionary of 3 keys and values and then convert into a dataframe
    data = {'participants' : participants,
            'control'   : control_mean,
            'dehydration'   : dehydration_mean,
            }
    data = pd.DataFrame(data)

    palette =  ["skyblue", "salmon"] #colors

    # create a list like:
    # [ ("blue", "control"), ("Ablue", "dehydration"), ("red", "control"), ("red", "dehydration"), ... ]
    x = [ (participant, test) for participant in participants for test in types ]
    counts = sum(zip(data['control'], data['dehydration']), ()) # like an hstack

    source = ColumnDataSource(data=dict(x=x, counts=counts))
    # plot
    p = figure(x_range=FactorRange(*x), y_range=[0, 23], width=900, height=500, 
                title='Title', y_axis_label="Y", x_axis_label="X")

    p.vbar(x='x', top='counts', width=1, source=source, line_color="white",
           fill_color=factor_cmap('x', palette=palette, factors=types, start=1, end=2))

    p.y_range.start = 0
    p.x_range.range_padding = 0.1
    p.xaxis.major_label_orientation = 1
    p.xgrid.grid_line_color = None

    return p

def calculate_standard_error(df, datacol):
    """
    A function to calculate the standard error of one column within a dataframe
    
    Args:
        df: one of the dataframes returned in the data_dict
        datacol: the column of wich you want the SE
    
    Returns:
        data: returns the standard error
        
    By:
        Mahdiye
        Jacob Menzinga
    """
    
    df_mean = df.groupby(by=["participant", "type"]).agg(mean=(datacol, "mean"))
    df_se = df.groupby(by=["participant", "type"]).agg(se=(datacol, "sem"))
    upper = df_mean["mean"] + 1.96 * df_se["se"]
    lower = df_mean["mean"] - 1.96 * df_se["se"]
    data = pd.concat([upper.rename("upper"), lower.rename("lower")], axis=1)
    return data


def plot_standard_error(plot, data):
    """
    A function to add the SE as wiskers to an already existing plot
    The data is derived form the 'calculate_standard_error' function.
    
    Args:
        plot: an alredy existing bokeh plot object
        data: the standard error data
        
    Returns:
        p: the plot given as argument with added whiskers
    """
    
    x = list(data.index.values)
    data_map = {
        'x': x,
        'upper': data["upper"].tolist(),
        'lower': data["lower"].tolist()

        }
    source = ColumnDataSource(data=data_map)

    w = Whisker(source=source, base="x", upper="upper", lower="lower", 
            line_color='purple', level="overlay")
    w.upper_head.line_color = 'purple'
    w.lower_head.line_color = 'purple'
    w.upper_head.size = w.lower_head.size = 20
    plot.add_layout(w)
    return plot

def plot_error_bar(df, datacol):
    """
    Plots a barplot with error whiskers 

    Args:
        df (DataFrame): a dataframe from the data_dict
        datacol (string): the column name of the y-variable you want to plot

    Returns:
        _type_: _description_
        
    By:
        Mahdiye
        Jacob Menzinga
    """
    data_se = calculate_standard_error(df, datacol)
    p = barplot(df)
    p = plot_standard_error(p, data_se)
    return p

In [None]:
plot_error_bar()

In [110]:
def verbal_test(verbal_df): # Jacob
    """
    This function takes the verbal dataframe from the data_dict, turns it into
    its own df, calculates average words produced per participant and plots this
    
    Parameters
        verbal_df: The data_dict with verbal as key
        
    Returns
        verbal_df: The verbal task DataFrame
        verbal_avg: A dataframe with the average words produced per participant
                    and session type
        verbal_barplot: A barplot displaying the verbal_avg data.
    """
    
    verbal_df = data_dict['verbal'].copy()
    verbal_df = verbal_df[verbal_df[1] != 'word count'] # to remove silly headers
    verbal_df.rename(columns={0:'word_type', 1:'n'}, inplace=True)
    verbal_df['n'] = verbal_df['n'].astype(int)

    verbal_avg = verbal_df.groupby(['participant', 'type']).mean().round(2)
    
    verbal_barplot = plot_error_bar(verbal_df, 'n')
    
    return verbal_df, verbal_avg, verbal_barplot

verbal_df, verbal_avg, verbal_barplot = verbal_test(data_dict['verbal'])


# Customizing barplot a little
verbal_barplot.title.text = 'Average word count per participant, per session type'
verbal_barplot.xaxis.axis_label = 'Participant, Session type'
verbal_barplot.yaxis.axis_label = 'Average words repoduced'

show(verbal_barplot)

### Digit Span Analysis

In [None]:
def digit_test(digit_df): # Karina
    '''Function to analyse the Digit Span data and to create a dataframe with the information needed to plot'''
    # Change data types
    digit_df = digit_df.astype({'participant': 'string',
                                'type': 'string',
                                'repeat': 'int',
                                'seq length':'float',
                                'errors': 'float',
                                'clicks expected': 'float',
                                'clicks observed': 'float'})

    digit_df['clicks difference'] = digit_df['clicks observed'] - digit_df['clicks expected']
    # Make calculations by column taking the groups into account               
    digit_grouped = digit_df.groupby(['participant','type', 'repeat']).agg({'seq length': 'max','errors': 'mean', 'clicks difference':'mean'} )

    # Calculte values needed for the analysis
    digit_grouped['seq length'] = digit_grouped['seq length'] - 1

    # Obtain mean and estandar error
    digit_mean_sem = digit_grouped.groupby(['participant', 'type']).agg(['mean','sem'])

    return digit_mean_sem

#Running function
digit_mean_sem = digit_test(digit_df = df_dict["digit"])


def digit_barplots(analysis):
    '''Function to plot the digit span data with error bars'''

    df = digit_mean_sem
    # Data for the barplots
    participants = df.reset_index().participant.unique().tolist()
    sessions_type = df.reset_index().type.unique().tolist()
    values = df[analysis]['mean'].tolist()

    # Data for the error bars
    upper = df[analysis]['mean'] + 1.96 * df[analysis]['sem']
    lower = df[analysis]['mean'] - 1.96 * df[analysis]['sem']
    data = pd.concat([upper.rename("upper"), lower.rename("lower")], axis=1)

    palette=['skyblue', 'salmon']
    # Dictionary to change the y labels
    y_label = {'seq length': 'Number of digits (mean)',
              'errors':'Number of errors made (mean)',
              'clicks difference': 'Diference in errors made (mean)'}
    # Dictionary to change the plot titles
    title = {'seq length': 'Digit Span - Longest sequence remembered',
              'errors':'Digit Span - Erorrs',
              'clicks difference': 'Digit Span - Diference in errors made (mean)'}

    x = [(participant, session) for participant in participants for session in sessions_type]
    source = ColumnDataSource(data=dict(x=x, counts=values))
    
    # Create the barplots
    p = figure(x_range=FactorRange(*x), height=450, title=title[analysis],
            toolbar_location=None, tools="")

    p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white",
        fill_color=factor_cmap('x', palette=palette, factors=sessions_type, start=1, end=2))

    # customise x-axis
    p.xaxis.axis_label = "Participant"
    p.x_range.range_padding = 0.1
    p.xaxis.major_label_orientation = 1
    p.xgrid.grid_line_color = None

    #  customise y-axis
    p.yaxis.axis_label = y_label[analysis]
    p.y_range.start = 0
    p.yaxis.major_label_orientation = "vertical"
    p.y_range.range_padding = 1
    
    # Run the function to add the error bars
    p = plot_standard_error(plot=p, data=data)
    return (p)

p_seq_len = digit_barplots('seq length')
show(p_seq_len )

p_error = digit_barplots('errors')
show(p_error)

# # Making interactive the plots 
#analyses = ['seq length', 'errors']
#inter_plot = pn.interact(digit_barplots, analysis=analyses)
#inter_plot

# # Creating a dashboard
# dashboard = pn.template.BootstrapTemplate(title='Title', sidebar_with = 400)
# dashboard.sidebar.append(inter_plot[0])
# dashboard.main.append(inter_plot[1])
# dashboard.show()


### Dehydration percentage

In [None]:
# Obtain personal data
df_health = df_dict['personal']
# Theres one missing value in the green session. Fill it with the correct value
df_health[0] = df_health[0].fillna(2)

def dehydration_percentage():
    '''Function to obtain the % of dehydration per participant per session'''
    # Keep only the body weight data and drop NaN
    body_weight = df_health[['participant','type', 0, 5]].dropna()
    # drop the rows with strings on them (the ones with index ==0 )
    body_weight = body_weight[body_weight.index!=0]
    # Rename the columns
    body_weight.rename(columns={0:'session', 5:'body weight'}, inplace=True)

    body_weight = body_weight.astype({'body weight':'float'})
    # calculate percentage of dehydration
    body_weight['dehydration %'] = (body_weight.groupby(['participant','type','session'])['body weight'].pct_change()) * 100
    return body_weight



In [None]:
dehydration_percentage = round(dehydration_percentage.dropna(),2)
dehydration_percentage.drop(dehydration_percentage.columns[3], axis=1, inplace=True)
dehydration_percentage = dehydration_percentage[dehydration_percentage['type']=='dehydration']
dehydration_percentage = dehydration_percentage.T
dehydration_percentage.columns = dehydration_percentage.iloc[0] # consider the first row as header
dehydration_percentage = dehydration_percentage[1:]
dehydration_percentage

AttributeError: 'function' object has no attribute 'dropna'

## Panel

In [None]:
sample_text = '''
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque augue eros, tristique ut eros et, bibendum mattis tellus. Integer dui sapien, pulvinar nec ante nec, rutrum feugiat massa. Fusce tristique viverra nunc, sed commodo orci rhoncus sed. Aliquam pellentesque dui lectus, vel gravida eros volutpat vitae. Aliquam faucibus nulla id dolor suscipit elementum. Donec sed ante hendrerit, porta ligula faucibus, venenatis mi. Donec id imperdiet neque. Ut vel blandit urna. Fusce convallis, eros at suscipit aliquam, quam tellus pharetra est, ultrices ultrices dolor mi eu enim. Integer sed rutrum tellus.

Etiam non commodo sem. Fusce faucibus tristique mauris, et fermentum quam euismod et. Vestibulum tempor mi neque, et consectetur odio tincidunt ut. Nunc scelerisque sed neque vitae efficitur. Nulla rutrum purus hendrerit, posuere massa ut, pharetra mi. Pellentesque nisi ipsum, pretium ut interdum eget, tempor at dui. Vestibulum a lectus est. Curabitur faucibus id neque ut pharetra. Proin rutrum aliquet scelerisque. Vestibulum id felis at eros accumsan commodo. Vestibulum nec sem felis. Aenean in ullamcorper diam.

In commodo nisl turpis, id laoreet elit suscipit eu. Mauris ut interdum odio. Vivamus ultricies lorem ligula, ut consequat sapien tempor non. Aenean pellentesque nulla sit amet sem fermentum auctor. Nulla facilisi. Sed iaculis vehicula neque, sit amet tempor libero fringilla quis. Phasellus malesuada placerat elit nec vestibulum. Etiam eu odio imperdiet, ornare leo sed, suscipit magna. Proin diam ante, imperdiet eu odio ac, consectetur euismod ipsum. Vivamus non odio aliquet, dapibus elit sit amet, viverra diam. Proin posuere orci eget orci tempus, ut eleifend ipsum mattis. Fusce ultrices est vitae nibh aliquet sollicitudin. Duis vehicula erat turpis, ac efficitur turpis sagittis eget.

Proin eros sapien, vestibulum at congue a, hendrerit sed lacus. Mauris aliquet egestas mauris, sit amet mattis velit faucibus convallis. Phasellus aliquam sapien eros, quis volutpat velit faucibus ut. Vestibulum pulvinar mollis orci vel fringilla. In dapibus, mi iaculis ornare tincidunt, lacus risus sollicitudin tortor, blandit eleifend tellus arcu id tortor. Cras nec fringilla nunc, a fermentum urna. Vivamus urna ligula, tempus nec dolor sed, fermentum faucibus velit. Nulla convallis vitae turpis in tempor.

Integer non faucibus mi, vel gravida felis. Suspendisse vel mi felis. Curabitur dapibus enim ullamcorper consequat vulputate. Suspendisse scelerisque nibh ut luctus iaculis. Sed nunc urna, hendrerit vel sapien nec, imperdiet posuere felis. Cras varius nibh sed tortor congue, et egestas velit lacinia. Mauris purus magna, posuere vel metus non, tempus mattis lacus. Vestibulum turpis justo, posuere nec ante at, facilisis tristique dui. Aenean gravida, eros in luctus lobortis, ipsum lorem ornare felis, vel volutpat ipsum metus vitae erat. Aliquam condimentum aliquam ipsum, at aliquet quam congue quis. Phasellus eu metus velit.
'''

In [None]:
# CSS styling
css = '''
.sidebar_button .bk-btn-group button {
  font-size: 10pt;
}
.bk-root .bk-btn-default{
  border-radius: 25px;
}

.red_button .bk-btn-default.bk-active {
  background-color: #ff5447;
  color: white;
  border-radius: 25px;
}
.orange_button .bk-btn-default.bk-active {
  background-color: #ff8d54;
  color: white;
  border-radius: 25px;
}
.green_button .bk-btn-default.bk-active {
  background-color: #67ba65;
  color: white;
  border-radius: 25px;
}
.blue_button .bk-btn-default.bk-active {
  background-color: #5fc6fa;
  color: white;
  border-radius: 25px;
}
.pink_button .bk-btn-default.bk-active {
  background-color: #f06ed6;
  color: white;
  border-radius: 25px;
}
'''

pn.extension(raw_css=[css])

dashboard = pn.template.BootstrapTemplate(header_background='#00C9FF',
                                        #  theme=DarkTheme,
                                          sidebar_width=200)


def show_homepage(event):
    title.object = f'##{event.obj.name}'
    
def update_page(event):
    print(event)
    name = event.name
    figure1.clear()
    figure1.append(flanker_counts)
    title.object = f'##{event.obj.name}'
    figures = pn.Row(pn.Column('test2'), pn.Column('test3'))
    
    

# Create sidebar buttons:
homepage_btn       = pn.widgets.Button(name='Homepage', width=150, css_classes=['sidebar_button'])
stroop_btn         = pn.widgets.Button(name='Stroop test', width=150, css_classes=['sidebar_button'])
stop_signal_btn    = pn.widgets.Button(name='Stop Signal test', width=150, css_classes=['sidebar_button'])
flanker_btn        = pn.widgets.Button(name='Flanker test', width=150, css_classes=['sidebar_button'])
digit_span_btn     = pn.widgets.Button(name='Digit span test', width=150, css_classes=['sidebar_button'])
verbal_fluency_btn = pn.widgets.Button(name='Verbal fluency test', width=150, css_classes=['sidebar_button'])

# append buttons to sidebar
dashboard.sidebar.append(homepage_btn)
dashboard.sidebar.append(stroop_btn)
dashboard.sidebar.append(stop_signal_btn)
dashboard.sidebar.append(flanker_btn)
dashboard.sidebar.append(digit_span_btn)
dashboard.sidebar.append(verbal_fluency_btn)

# create participant options buttons
red_btn = pn.widgets.Toggle(name='Red', value=True, width=100, css_classes=['red_button'])
orange_btn = pn.widgets.Toggle(name='Orange', value=True, width=100, css_classes=['orange_button'])
green_btn = pn.widgets.Toggle(name='Green', value=True, width=100, css_classes=['green_button'])
blue_btn = pn.widgets.Toggle(name='Blue', value=True, width=100, css_classes=['blue_button'])
pink_btn = pn.widgets.Toggle(name='Pink', value=True, width=100, css_classes=['pink_button'])

# make buttons interactive
homepage_btn.on_click(show_homepage)
stroop_btn.on_click(update_page)
# stop_signal_btn.on_click(update_page)
# flanker_btn.on_click(update_page)
# digit_span_btn.on_click(update_page)
# verbal_fluency_btn.on_click(update_page)

# fig = figure(height=400, width=400)
# fig.circle(x=range(10), y=range(10))

# create dashboard objects
title = pn.pane.Markdown('##Homepage', width=500)
options = pn.Row(red_btn, orange_btn, green_btn, blue_btn, pink_btn)
figure1 = pn.Column(stroop_barplot)
figure1_text = pn.Row(pn.pane.Markdown('\n'.join(sample_text.split('\n')[:4]), width=800))
figure2 = pn.Column(stroop_boxplot)
figure2_text = pn.Row(pn.pane.Markdown('\n'.join(sample_text.split('\n')[:4]), width=800))

# append dashboard objects to main dashboard
dashboard.main.append(title)
dashboard.main.append(options)
dashboard.main.append(figure1)
dashboard.main.append(figure1_text)
dashboard.main.append(figure2)
dashboard.main.append(figure2_text)

# show dashboard 
dashboard.show()

---