# HydroHomies Plots
In this notebook, the plots, figures and also some explanations or details about each of them are being presented.  

To clarify plots, please follow this order:
- Title for each plot is mandatory
- Analysis must be written 
- legends are manedatory

### Loading all data


In [None]:
import yaml
import pandas as pd
with open('config.yaml') as stream:
    config = yaml.safe_load(stream)

### Cleaning (Digit Span Raw Data)

In [None]:
def clean_digit_span(raw_df):
    # Select the sequence length data from the raw data and create a dataframe
    seq_length_df = raw_df[raw_df[1].astype(str).str.match(r'\d+')]

    # Get the value of the longest sequence remembered
    longest = seq_length_df[2]
    longest = longest.tolist()

    # Get the number of errors made
    error_number = seq_length_df[3]
    error_number = error_number.tolist()

    # Select the rows with the click stimulus data
    click_stim_df = raw_df[raw_df[1]=='clickedStim']
    click_stim_df.size

    # Calculate the number of clicks made by the participant
    clicks_observed = click_stim_df.count(axis=1) - 2 
    clicks_observed = clicks_observed.tolist()

    # Calculate the number of clicks that the participant should have made
    clicks_expected =  pd.to_numeric(longest) + 1
    clicks_expected = clicks_expected.tolist()

    # Create a new dataframe with all the values calculated above
    clean_data = pd.DataFrame(data ={'seq length':longest,
                        'errors': error_number,
                        'clicks expected': clicks_expected,
                        'clicks observed':clicks_observed})

    # Return the new dataframe
    return clean_data


### Data Integration For Each Test

In [None]:
def create_merged_df(config_dict):
    data_dict = {}

    # read the files 
    for test, file in config_dict.items():
        df_dict = pd.read_excel(file, sheet_name=None, header=None)

        for session, df in df_dict.items():

            # extracting the participant name and type name
            participant = test.split('_')[-1]
            test_name = test.split('_')[0]

            #extracting repeat number and making its column except for personal
            try:
                type, repeat = session.split('_')
                df.insert(0, 'repeat', repeat)

            except ValueError:
                type = session


            #cleaning digit_span row data
            if test_name == 'digit':
                df = clean_digit_span(df.iloc[3:])
            
            # verbal fluency test contains header
            elif test_name =='verbal':
                df = df.iloc[1:]

            # inserting the type and participant columns
            df.insert(0, 'type', type)
            df.insert(0, 'participant', participant)
 
            # concatenating data frames of each test
            if test_name not in data_dict:
                data_dict[test_name] = df
            else:
                data_dict[test_name] = pd.concat([data_dict[test_name], df])
    
    return data_dict

In [None]:
# Run the function
df_dict = create_merged_df(config)

---

### Flanker Test Analysis

In [None]:
def create_flanker_dataframe():
    data_dict = create_merged_df(config)
    flanker_df = data_dict["flanker"]
    flanker_df.rename(columns={0: "pattern", 1: "expression", 2: "correctness", 3: "response-time"}, inplace=True)
    flanker_df["correctness"] = flanker_df["correctness"].replace(1, "correct")
    flanker_df["correctness"] = flanker_df["correctness"].replace(2, "incorrect")
    flanker_df["correctness"] = flanker_df["correctness"].replace(3, "not-answer")
    return flanker_df

flanker_df = create_flanker_dataframe()

In [None]:
from bokeh.transform import dodge, factor_cmap
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, FactorRange
import panel as pn
from bokeh.io import output_notebook

output_notebook()
pn.extension()

def flanker_plot(answer_type="correct"):  # Roya
    
    flanker_df = create_flanker_dataframe()
    flanker_df = flanker_df[flanker_df["correctness"] == answer_type]

    flanker_df = flanker_df.groupby(["participant", "type", "repeat"])[
        "correctness"].count().reset_index()
    df = flanker_df.groupby(by=["participant", "type"])[
        "correctness"].mean()
    dehydration = flanker_df[flanker_df["type"] == "dehydration"]
    control = flanker_df[flanker_df["type"] == "control"]

    index_cmap = factor_cmap('x', palette=["salmon", "skyblue"], factors=["dehydration", "control"], start=1, end=2)
    data = {
        'x': list(df.index.values),
        'counts': sum(zip(control["correctness"].tolist(), dehydration["correctness"].tolist()), ())
        }

    source = ColumnDataSource(data=data)
    p = figure(x_range=FactorRange(*list(df.index.values)), y_range=(0, 50), height=400, title=f"Average of {answer_type} answers",
               toolbar_location=None, tools="", y_axis_label="counts")

    p.vbar(x='x', top='counts', width=0.9, source=source, fill_color=index_cmap)

    p.y_range.start = 0
    p.x_range.range_padding = 0.1
    p.xaxis.major_label_orientation = 1
    p.xgrid.grid_line_color = None

    return p


answer_types =['correct','incorrect']
inter_plot = pn.interact(flanker_plot, answer_type = answer_types)
inter_plot

### Stroop Test  Analysis

In [None]:
def stroop_test(): # Mahdiye
    total_dict = create_merged_df(config)
    stroop_df = total_dict['stroop']
    stroop_df.drop(stroop_df.columns[[3,7]], axis=1, inplace=True)
    stroop_df = stroop_df.set_axis(['participant', 'type','repeat','word name','word color',
                                    'name_color match','pressed _key','status','reaction_time'], axis=1)
    stroop_df['type&repeat'] = stroop_df['type']+stroop_df['repeat']
    return stroop_df

stroop_df = stroop_test()

In [None]:
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Whisker
import panel as pn
from bokeh.io import output_notebook

output_notebook()
pn.extension()

In [None]:
def individual_stroop_bar_plot(participant='blue'):
    df = stroop_df[stroop_df['participant']==participant]
    
    dff= df.groupby('type&repeat').min().reset_index()
    p = figure(x_range=dff['type&repeat'], height=350, toolbar_location=None, 
               title=f'Stroop Test {participant}', y_axis_label="Reaction time(milliseconds)")
    p.vbar(x=dff['type&repeat'], bottom=0,top=dff['reaction_time'], width=0.5, line_color='white', color=participant)
    return p

#interactive plots
participants_color =['blue','red','orange','green','pink']
inter_plot = pn.interact(individual_stroop_bar_plot, participant = participants_color)
inter_plot

In [None]:
def individual_stroop_box_plot(participant):
    
    df = stroop_df[stroop_df['participant']==participant]
    kinds = df['type&repeat'].unique()
    
    # compute quantiles
    qs = df.groupby('type&repeat').reaction_time.quantile([0.25, 0.5, 0.75])
    qs = qs.unstack().reset_index()
    qs.columns = ['type&repeat', "q1", "q2", "q3"]
    df = pd.merge(df, qs, on='type&repeat', how="left")

    # compute IQR outlier bounds
    iqr = df.q3 - df.q1
    df["upper"] = df.q3 + 1.5*iqr
    df["lower"] = df.q1 - 1.5*iqr

    source = ColumnDataSource(df)

    p = figure(x_range=kinds,y_range=[0,stroop_df['reaction_time'].max() * 1.3],tools="", toolbar_location=None,
                title="box plot of stroop test "+participant,
               background_fill_color="#eaefef", y_axis_label="Reaction time(milliseconds)")


    # outlier range
    whisker = Whisker(base='type&repeat', upper="upper", lower="lower", source=source)
    whisker.upper_head.size = whisker.lower_head.size = 20
    p.add_layout(whisker)

    # quantile boxes
    p.vbar('type&repeat', 0.5, "q2", "q3", color = participant,bottom=0, source=source, line_color="black")
    p.vbar('type&repeat', 0.5, "q1", "q2", color=participant, bottom=0, source=source, line_color="black")
    
    # outliers
    outliers = df[~df.reaction_time.between(df.lower, df.upper)]
    p.scatter('type&repeat', 'reaction_time', source=outliers, size=6, color="black", alpha=0.5)

    p.xgrid.grid_line_color = None
    p.axis.major_label_text_font_size="14px"
    p.axis.axis_label_text_font_size="12px"

    return p
    
#interactive plots
participants_color =['blue','red','orange','green','pink']
inter_plot = pn.interact(individual_stroop_box_plot, participant = participants_color)
inter_plot

### Stop Signal Analysis

In [None]:
def stop_test(stop_df): # Jacob
    pass

### Verbal Fluency Analysis

In [None]:
def verbal_test(verbal_df): # Jacob
    pass

### Digit Span Analysis

In [None]:
# def digit_test(digit_df): # Karina
#     pass
df_dict["digit"]

---