# HydroHomies Plots
In this notebook, the plots, figures and also some explanations or details about each of them are being presented.  

To clarify plots, please follow this order:
- Title for each plot is mandatory
- Analysis must be written 
- legends are manedatory

### Loading all data


In [12]:
import yaml
import pandas as pd
with open('config.yaml') as stream:
    config = yaml.safe_load(stream)
print(config)

{'personal_data_blue': '/commons/dsls/hydrohomies/Data/blue/personal_data_blue.xlsx', 'verbal_fluency_test_blue': '/commons/dsls/hydrohomies/Data/blue/verbal_fluency_test_blue.xlsx', 'stroop_test_blue': '/commons/dsls/hydrohomies/Data/blue/stroop_test_blue.xlsx', 'stop_signal_test_blue': '/commons/dsls/hydrohomies/Data/blue/stop_signal_test_blue.xlsx', 'flanker_test_blue': '/commons/dsls/hydrohomies/Data/blue/flanker_test_blue.xlsx', 'digit_span_test_blue': '/commons/dsls/hydrohomies/Data/blue/digit_span_test_blue.xlsx', 'personal_data_green': '/commons/dsls/hydrohomies/Data/green/personal_data_green.xlsx', 'verbal_fluency_test_green': '/commons/dsls/hydrohomies/Data/green/verbal_fluency_test_green.xlsx', 'stroop_test_green': '/commons/dsls/hydrohomies/Data/green/stroop_test_green.xlsx', 'stop_signal_test_green': '/commons/dsls/hydrohomies/Data/green/stop_signal_test_green.xlsx', 'flanker_test_green': '/commons/dsls/hydrohomies/Data/green/flanker_test_green.xlsx', 'digit_span_test_gree

### Cleaning (Digit Span Raw Data)

In [13]:
def clean_digit_span(raw_df):
    # Select the sequence length data from the raw data and create a dataframe
    seq_length_df = raw_df[raw_df[1].astype(str).str.match(r'\d+')]

    # Get the value of the longest sequence remembered
    longest = seq_length_df[2]
    longest = longest.tolist()

    # Get the number of errors made
    error_number = seq_length_df[3]
    error_number = error_number.tolist()

    # Select the rows with the click stimulus data
    click_stim_df = raw_df[raw_df[1]=='clickedStim']
    click_stim_df.size

    # Calculate the number of clicks made by the participant
    clicks_observed = click_stim_df.count(axis=1) - 2 
    clicks_observed = clicks_observed.tolist()

    # Calculate the number of clicks that the participant should have made
    clicks_expected =  pd.to_numeric(longest) + 1
    clicks_expected = clicks_expected.tolist()

    # Create a new dataframe with all the values calculated above
    clean_data = pd.DataFrame(data ={'seq length':longest,
                        'errors': error_number,
                        'clicks expected': clicks_expected,
                        'clicks observed':clicks_observed})

    # Return the new dataframe
    return clean_data


### Data Integration For Each Test

In [14]:
def create_merged_df(config_dict):
    data_dict = {}
    
    # select all files 
    files = {name: file for name, file in config_dict.items()} # files = config_dic.copy()
    # read the files 
    for test, file in files.items():
        df_dict = pd.read_excel(file, sheet_name=None, header=None)

        for session, df in df_dict.items():
            # extracting the participant name and type name
            participant = test.split('_')[-1]
            test_name = test.split('_')[0]
            #extracting repeat number and making its column except for personal
            try:
                type, repeat = session.split('_')
                df.insert(0, 'repeat', repeat)
            except ValueError:
                type = session

            # inserting the type and participant columns
            df.insert(0, 'type', type)
            df.insert(0, 'participant', participant)

            # cleaning 'training trial rows' in digit_span data
            if test_name == 'digit_span':
                try:
                    df = clean_digit_span(df.iloc[3:])
                except:
                    continue   
            # concatenating data frames of each test
            if test_name not in data_dict:
                data_dict[test_name] = df
            else:
                data_dict[test_name] = pd.concat([data_dict[test_name], df])
    
    return data_dict

In [None]:
# Run the function
data_dict = create_merged_df(config)

---

### Flanker Test Analysis

In [None]:
import matplotlib
matplotlib.style.use('ggplot')

def flanker_test(flanker_df): # Roya
    flanker_df.rename(columns={0:"pattern", 1:"expression", 2:"correctness", 3:"response-time"}, inplace=True)
    flanker_df["correctness"] = flanker_df["correctness"].replace(1, "correct")
    flanker_df["correctness"] = flanker_df["correctness"].replace(2, "incorrect")
    flanker_df["correctness"] = flanker_df["correctness"].replace(3, "not-answer")
    flanker_df = flanker_df[flanker_df["correctness"] == "correct"]
    
    df = flanker_df.groupby(["participant", "type"])["correctness"].value_counts()


    dehydration = flanker_df[flanker_df["type"] == "dehydration"]
    control = flanker_df[flanker_df["type"] == "control"]
    dehydration = dehydration.groupby(["participant"])["correctness"].count().rename("dehydration").to_frame()
    control = control.groupby(["participant"])["correctness"].count().rename("control").to_frame()
    df = dehydration.join(control)
    df.plot(kind="bar", width=0.3, title=" correct answers")

data_dict = create_merged_df(config)
# print(data_dict["flanker"])
# data_dict["flanker"].to_csv("data.csv")
flanker_test(data_dict["flanker"])

### Stroop Test  Analysis

In [None]:
def stroop_test(stroop_df): # Mahdiye
    pass

### Stop Signal Analysis

In [None]:
def stop_test(stop_df): # Jacob
    pass

### Verbal Fluency Analysis

In [None]:
def verbal_test(verbal_df): # Jacob
    pass

### Digit Span Analysis

In [22]:
# def digit_test(digit_df): # Karina
#     pass
# data_dict["digit"]

Unnamed: 0,participant,type,repeat,0,1,2,3,4,5,6,...,14,15,16,17,18,19,20,21,22,23
0,blue,control,1,1,clickedStim,14,13,5,,,...,,,,,,,,,,
1,blue,control,1,1,rts,907,892,1203,,,...,,,,,,,,,,
2,blue,control,1,1,0,2,0,//,8,7,...,,,,,,,,,,
3,blue,control,1,2,clickedStim,10,13,5,,,...,,,,,,,,,,
4,blue,control,1,2,rts,1453,723,931,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31,orange,dehydration,2,2,rts,1633,482,766,640.0,628.0,...,,,,,,,,,,
32,orange,dehydration,2,2,5,6,0,//,9.0,1.0,...,2.0,8.0,4.0,7.0,,,,,,
33,orange,dehydration,2,2,clickedStim,12,14,18,17.0,13.0,...,,,,,,,,,,
34,orange,dehydration,2,2,rts,984,398,1285,426.0,527.0,...,,,,,,,,,,


---