# HydroHomies Plots
In this notebook, the plots, figures and also some explanations or details about each of them are being presented.  

To clarify plots, please follow this order:
- Title for each plot is mandatory
- Analysis must be written 
- legends are manedatory

### Importing the needed modules

In [None]:
import yaml
import pandas as pd
import numpy as np

from bokeh.plotting import figure, show
from bokeh.io import output_notebook
output_notebook()

import hvplot.pandas

### Loading all data


In [None]:
with open('config.yaml') as stream:
    config = yaml.safe_load(stream)
print(config)

### Cleaning (Digit Span Raw Data)

In [None]:
def clean_digit_span(raw_df):
    # Select the sequence length data from the raw data and create a dataframe
    seq_length_df = raw_df[raw_df[1].astype(str).str.match(r'\d+')]
    seq_length_df

    # Get the value of the longest sequence remebered
    longest = seq_length_df[2]
    longest = longest.tolist()

    # Get the number of errors made
    error_number = seq_length_df[3]
    error_number = error_number.tolist()

    click_stim_df = raw_df[raw_df[1]=='clickedStim']
    click_stim_df.size

    clicks_observed = click_stim_df.count(axis=1) - 2 
    clicks_observed = clicks_observed.tolist()

    clicks_expected =  pd.to_numeric(longest) + 1
    clicks_expected = clicks_expected.tolist()

    clean_data = pd.DataFrame(data ={'seq length':longest,
                        'errors': error_number,
                        'clicks expected': clicks_expected,
                        'clicks observed':clicks_observed})
    
    return clean_data

### Data Integration For Each Test

In [None]:
def create_merged_df(config_dict):
    data_dict = {}
    
    # select all files 
    files = {name: file for name, file in config_dict.items()} # files = config_dic.copy()
    # read the files 
    for test, file in files.items():
        df_dict = pd.read_excel(file, sheet_name=None, header=None)

        for session, df in df_dict.items():
            # extracting the participant name and type name
            participant = test.split('_')[-1]
            test_name = test.split('_')[0]
            #extracting repeat number and making its column except for personal
            try:
                type, repeat = session.split('_')
                df.insert(0, 'repeat', repeat)
            except ValueError:
                type = session

            # inserting the type and participant columns
            df.insert(0, 'type', type)
            df.insert(0, 'participant', participant)

            #cleaning digit_span row data
            if test_name == 'digit_span':
                try:
                    df = clean_digit_span(df.iloc[3:])
                except:
                    continue   
            # concatenating data frames of each test
            if test_name not in data_dict:
                data_dict[test_name] = df
            else:
                data_dict[test_name] = pd.concat([data_dict[test_name], df])
    
    return data_dict

data_dict = create_merged_df(config)

---

### Flanker Test Analysis

In [None]:
import matplotlib
matplotlib.style.use('ggplot')

def flanker_test(flanker_df): # Roya
    flanker_df.rename(columns={0:"pattern", 1:"expression", 2:"correctness", 3:"response-time"}, inplace=True)
    flanker_df["correctness"] = flanker_df["correctness"].replace(1, "correct")
    flanker_df["correctness"] = flanker_df["correctness"].replace(2, "incorrect")
    flanker_df["correctness"] = flanker_df["correctness"].replace(3, "not-answer")
    flanker_df = flanker_df[flanker_df["correctness"] == "correct"]
    
    df = flanker_df.groupby(["participant", "type"])["correctness"].value_counts()


    dehydration = flanker_df[flanker_df["type"] == "dehydration"]
    control = flanker_df[flanker_df["type"] == "control"]
    dehydration = dehydration.groupby(["participant"])["correctness"].count().rename("dehydration").to_frame()
    control = control.groupby(["participant"])["correctness"].count().rename("control").to_frame()
    df = dehydration.join(control)
    df.plot(kind="bar", width=0.3, title=" correct answers")

data_dict = create_merged_df(config)
# print(data_dict["flanker"])
# data_dict["flanker"].to_csv("data.csv")
flanker_test(data_dict["flanker"])

### Stroop Test  Analysis

In [None]:
def stroop_test(stroop_df): # Mahdiye
    pass

### Stop Signal Analysis

In [None]:

column_meanings = {'Column':[0,1,2,3,4,5,6,7],
                   'Meaning':['trial type (go or nogo)', 
                              'required response (left or right)', 
                              'when the stop signal is shown (or 0 if not)', 
                              'response time 1', 
                              'status 1 (1=correct, 2=wrong, 3=timeout)',
                              'response time 2 (only in no go trials)',
                              'status 2 (only in no go trials; 1=correct, 2=wrong, 3=timeout)',
                              '1=trial is correct ; 0=trial is not correct']} 

column_meanings = pd.DataFrame(column_meanings)
column_meanings.set_index('Column', inplace=True)
column_meanings


In [None]:
stop_df = data_dict['stop']

# renaming and reordering columns
stop_df.rename(columns = {0:'trial_type', 1:'correct_resp.', 
                          2:'stop_signal_delay', 3:'response_time',
                          4:'status', 5:'resonse_time_nogo',
                          6:'status_nogo', 7:'correct'}, inplace = True)

stop_df = stop_df[['participant', 'type', 'repeat', 'trial_type',
                   'correct_resp.', 'correct', 'stop_signal_delay', 'response_time',
                   'status', 'resonse_time_nogo', 'status_nogo']]

# The average resonse time for go trials per trial type
avg_go_resp_time = stop_df[stop_df['trial_type'] == 'go'].groupby([
    'participant', 'type','status']).mean()['response_time']


# The average resonse time for no-go trials per correct/incorrect trial
avg_nogo_resp_time = stop_df[stop_df['trial_type'] == 'nogo'].groupby([
    'participant', 'type','status_nogo']).mean()['response_time']

# print(avg_nogo_resp_time)
# Good to keep in mind that here, status three corresponds with a correct trail
# Since there was no press in a no-go trial.

# Number of errors and time-outs in go trials
errors_timeout_go = stop_df[(stop_df['trial_type'] == 'go') & 
                            (stop_df['status'] != 1.0)].groupby([
                                'participant', 'type', 'repeat','status']).count()['trial_type']

# print(errors_timeout_go)

# Number of errors and time-outs in no-go trials
errors_timeout_nogo = stop_df[stop_df['trial_type'] == 'nogo'].groupby([
    'participant', 'type', 'repeat','status_nogo']).count()['trial_type']

# print(errors_timeout_nogo)



In [None]:
stop_df[['response_time', 'participant', 'type'
         ]].hvplot.box(by='type', groupby='participant',
                       xlabel='Session Type', ylabel='Resopnse Time (ms)')

In [None]:
def stop_test(stop_df): # Jacob
    pass

### Verbal Fluency Analysis

In [None]:
verbal_df = data_dict['verbal']
verbal_df

In [None]:
def verbal_test(verbal_df): # Jacob
    pass

### Digit Span Analysis

In [None]:
def digit_test(digit_df): # Karina
    pass

---