### Trial Level Accuracy

<b>Function</b>: Calculates the classifier accuracy for each trial where subject also rates how well they paid attention to the specified task. 
<br>
#### Output
* trial_accuracy.csv (*trial-by-trial accuracies for subject*)
<br>
* ratings.csv (*trial-by-trial accuracies matched with ratings for subect*)
<br>
* all_subjects_rating.csv (*ratings files for all subjects in one*)
<br>
<br>
* rating_level_accuracy.csv (*accuracy collapsed by rating with rating count (number of times the rating was entered*)
* rating_level_accuracy_clean.csv (*same as above with rating count for trials >= 3 consecutive events*)
<br>
#### Within Subjects Correlation - All Conditions
<br>
* all_subjects_within_subj_corr_n33.csv (*trial accuracy by rating: R, p, & z values for each subject)
<br>
#### Within Subjects Correlation - By Condition
<br>
* trial accuracy by rating: R, p, & z values for each subject, separated by condition
* all_subjects_within_subj_corr_n9_breath.csv
* all_subjects_within_subj_corr_n6_self.csv
* all_subjects_within_subj_corr_n9_sounds.csv
* all_subjects_within_subj_corr_n9_feet.csv

In [None]:
import os

import numpy as np
import pandas as pd
import scipy.stats
import scipy.io as sio

#### Set analysis name

In [None]:
analysis = "phase1_demo"

#### Set Root, Regressor, and Eprime files directory

In [None]:
root_dir = "/Path/to/EMBODY/files"
regressor_dir = "%s/regressors" % root_dir
eprime_files = "%s/eprime_files" % root_dir 

#### Set up subjects & subject task orders

In [None]:
subjects = [124]
orders = ['order2']

--------

### Import Necessary Files

<h4>Import MATLAB File

In [None]:
def import_matlab_file(subj):
    """Input is subject number (int) to import subject's pretty_results_step1.mat file.
       Output is pd DataFrame with block number & regressor accuracy (0 or 1)."""
    
    os.chdir("%s/%s/results/step1/%d" % (root_dir, analysis, subj))
    
    pretty_results_step1 = sio.matlab.loadmat("pretty_results_step1.mat")
    pretty_results_step1 = pretty_results_step1["prettyResult"][0]
    
    classCorrect = pd.Series(pretty_results_step1['classCorrect'][0][0])
    
    block = pd.Series(pretty_results_step1["block"][0][0])
    block_accuracy = pd.DataFrame({'block': block, 'classCorrect': classCorrect})
    
    return block_accuracy

<h4>Import EPrime Ratings File

In [None]:
def openTable(name):
    """ Opens eprime file with subject ratings and returns in a format readable by Python

        name: filename (example, openTable("embody_mvpa.txt"))
    """
    import asciitable

    unicode = open(name).read().decode('utf16')
    spliced = unicode[unicode.index("ExperimentName"):]
    names = spliced[spliced.index('\n')-1:]
    ascii = spliced.decode('ascii')
    
    return asciitable.read(ascii, numpy=False)

<h4>Import regressor files matched for each task order

In [None]:
def import_regressor_files(order):
    """Input regressor order number (int).
    
        Returns a Pandas DataFrame with trial & condition for each TR in specified order.
    
        Input is integer: 1, 2, 3, 4"""
    
    # order_string = "order%d" % order
    
    os.chdir("%s/%s" % (regressor_dir, order))
    
    # read file with regressor 
    regressors = sio.matlab.loadmat("regressors.mat")
    regressors = regressors["regressors"]

    regressors_df = pd.DataFrame(regressors)
    regressors_df = regressors_df.transpose()
    regressors_df["trial"] = None

    data = regressors_df.copy()
    data["condition"] = None
    
    ####INSERT CONDITION NAMES

    for tup in data.itertuples():
        
        if tup[0] < (data.shape[0] - 1):
            
            # if sum of row is > 0 it represents an event, if sum of row == 0 it represents a break
            row_sum = sum(tup[1:6])

            if row_sum == 0:
                data.iloc[tup[0], 5] = None
                data.iloc[tup[0], 6] = "break"

            elif tup[1] == 1:
                data.iloc[tup[0], 6] = "breath"

            elif tup[2] == 1:
                data.iloc[tup[0], 6] = "feet"

            elif tup[3] == 1:
                data.iloc[tup[0], 6] = "stop"

            elif tup[4] == 1:
                data.iloc[tup[0], 6] = "self"

            elif tup[5] == 1:
                data.iloc[tup[0], 6] = "sounds"
                                    
    #### INSERT TRIAL NUMBERS

    trial_number = 0

    for tup in data.itertuples():
        if tup[0] < (data.shape[0] - 1):
            
            # identify trial change
            # if the event is a break and the next event is different from the current, increase trial number
            if (data.iloc[tup[0], 6] != data.iloc[(tup[0] + 1), 6]) and data.iloc[tup[0], 6] == "break":
                trial_number += 1
            
            # set trial number
            data.iloc[tup[0], 5] = trial_number

    data.iloc[len(data) - 1, 5] = data.iloc[(len(data) - 2), 5]

    data = data[data["condition"] != "break"]

    data.reset_index(drop=True, inplace=True)

    data = data[["trial", "condition"]]
        
        
    return data[:-1]

#### Set Regressor Orders

In [None]:
# Import regressor files for each of the four orders.

order1_regressor = import_regressor_files("order1")

order2_regressor = import_regressor_files("order2")

order3_regressor = import_regressor_files("order3")

order4_regressor = import_regressor_files("order4")

-----------

#### Set up functions

In [None]:
def accuracy_by_trial(subj, order):
    
    """Input subject number and subject's regressor order as integers
        E.g., accruacy_by_trial(124, 2) 
        
        Returns dataframes with block/trial/condition and trial-by-trial accuracies."""
    
    order_df = import_regressor_files(order)
    
    data = import_matlab_file(subj).merge(order_df, left_index=True, right_index=True)
    trial_accuracy = data.groupby(["trial"])["classCorrect"].mean()
    trial_accuracy = pd.DataFrame(trial_accuracy)
    trial_accuracy = trial_accuracy.reset_index()
        
    return data, trial_accuracy

In [None]:
def data_for_subj_ratings(subj, order):
    
    """Input subject number and subject's regressor order as integers.
        E.g., accruacy_by_trial(124, 2) 
    
       Returns a dataframe with trial, block, trial accuracy, & condition with trials 40-end."""
    
    condition = pd.DataFrame(accuracy_by_trial(subj, order)[0][["block", "trial", "condition"]])
    
    # Join trial condition information with trial level accuracy data
    trial_accuracy_plus_condition = accuracy_by_trial(subj, order)[1].merge(condition, how="right", on="trial")
    
    # reorder column names
    trial_accuracy_plus_condition = trial_accuracy_plus_condition[["trial", "block", "classCorrect", "condition"]]
    
    # Take data from trial 40 to last trial & reset index so it is sequential.
    trial_40to78_acc = trial_accuracy_plus_condition.iloc[1080:]
    trial_40to78_acc = trial_40to78_acc.reset_index(inplace=False, drop=True)
    
    # Keep only one row for each trial (the others are duplicates).
    trial_40to78_acc = trial_40to78_acc.drop_duplicates()
    trial_40to78_acc = trial_40to78_acc.reset_index(inplace=False, drop=True)
    
    return trial_40to78_acc

In [None]:
def eprime_ratings_with_regressors(subj, order):
    
    """import subject's eprime ratings file & merge with regressor & trial accuracy data."""
    
    ### SET PATH TO EPRIME FILES
    os.chdir('%s/%d' % (eprime_files, subj))
    
    eprime_file = openTable("embody_%s_%s.txt" % (order, subj))
    eprime_df = pd.DataFrame(eprime_file)
    task_conditions = eprime_df[["Block", "Condition", "Duration","BodyRatings3.RESP"]]
    task_conditions.loc[:, "Duration"] = pd.to_numeric(task_conditions.loc[:, "Duration"]) / 1000
    task_conditions = task_conditions.reset_index()
    
    # index becomes trial number 1-end
    task_conditions["index"] = task_conditions["index"] + 1
    
    # Remove baseline & Search conditions
    task_conditions = task_conditions[~task_conditions["Condition"].isin(["Baseline", "Search"])]
    
    # Drop rows with NaN values
    task_conditions.loc[:, "Block"] = 0
    task_conditions = task_conditions.dropna(how="any")
    
    task_conditions = task_conditions.reset_index(inplace=False, drop=True)
    task_conditions = task_conditions.drop(["index"], axis=1)
    
    merged_data = data_for_subj_ratings(subj, order).merge(task_conditions, left_index=True, right_index=True)
    merged_data = merged_data[merged_data["Condition"] != "Stop"]
    
    merged_data.loc[:, "BodyRatings3.RESP"] = pd.to_numeric(merged_data["BodyRatings3.RESP"], downcast='float')
    
    # add block numbers to df
    merged_data = merged_data.reset_index(inplace=False, drop=True)

    for tup in merged_data.itertuples():
        if (tup[0] + 1) in range(1, 14):
            merged_data.iloc[tup[0], 5] = 4
        elif (tup[0] + 1) in range(14, 27):
            merged_data.iloc[tup[0], 5] = 5
        elif (tup[0] + 1) in range(27, 39):
            merged_data.iloc[tup[0], 5] = 6
        else:
            print("Index is out of range: %d" % tup[0])
    
    return merged_data

In [None]:
def remove_nan_pearsons_r(rating_array, accuracy_array, subj):
        
        """Count & Remove any nan (not a number) values before Pearson's R correlation."""
    
        nan_count = 0 
        
        print("** Subject %d's data contains null values." % subj)
        print("** Dropping null values before correlation.")
        
        # check for number of nan values in body_ratings_np
        for item in rating_array:
            if np.isnan(item):
                nan_count += 1
                
        print("** Removed %d null values." % nan_count)
        print("")
        
        rating_df = pd.DataFrame(rating_array)
        accuracy_df = pd.DataFrame(accuracy_array)
        
        merge_df_clean_nan = rating_df.merge(accuracy_df, left_index=True, right_index=True)
        merge_df_clean_nan = merge_df_clean_nan.dropna(axis=0, how="any")
        
        rating_array = np.array(merge_df_clean_nan["0_x"])
        accuracy_array = np.array(merge_df_clean_nan["0_y"])
        
        return (rating_array, accuracy_array)

--------

### Begin Analysis

#### Write 'trial_accuracy.csv' for each subject.

In [None]:
print("Writing 'trial_accuracy.csv' for...")
print("")

for i in range(len(subjects)):

    print("subject %d" % subjects[i])
    
    # calculate accuracy by trial
    trial_accuracy = accuracy_by_trial(subjects[i], orders[i])[1]
    
    trial_accuracy.to_csv('%s/%s/results/step1/%d/trial_accuracy.csv' % (root_dir, analysis, subjects[i]), 
                          header=True)


#### Writing ratings.csv for each subject.

In [None]:
print("writing 'ratings.csv' for...")

# all subject data for taking overall ratings mean
all_subject_ratings = pd.DataFrame(columns=["classCorrect", "BodyRatings3.RESP"])
all_subject_ratings_breath = pd.DataFrame(columns=["classCorrect", "BodyRatings3.RESP", "condition"])
all_subject_ratings_self = pd.DataFrame(columns=["classCorrect", "BodyRatings3.RESP", "condition"])


for i in range(len(subjects)):

    # match subject data with ratings data & write out ratings.csv for each subject
    trial_40to78_acc = data_for_subj_ratings(subjects[i], orders[i])

    # open eprime file
    eprime_file = eprime_ratings_with_regressors(subjects[i], orders[i])

    # write classifier & rating data to csv
    ratings_out = eprime_file.loc[:, ["trial", "classCorrect", "condition", "Duration", "BodyRatings3.RESP"]]

    to_append = ratings_out.loc[:, ["classCorrect", "BodyRatings3.RESP", "condition"]]
    to_append_breath = to_append[to_append["condition"] == "breath"]
    to_append_self = to_append[to_append["condition"] == "self"]

    all_subject_ratings = all_subject_ratings.append(to_append)
    all_subject_ratings_breath = all_subject_ratings_breath.append(to_append_breath)
    all_subject_ratings_self = all_subject_ratings_self.append(to_append_self)
    
    print("subject %d" % subjects[i])

    ratings_out.to_csv("%s/%s/results/step1/%d/ratings.csv" % (root_dir, analysis, subjects[i]),
                       header=True)

#### Write out 'all_subject_ratings' dataframe to csv

In [None]:
all_subject_ratings.to_csv('%s/%s/results/step1_compile/all_subjects_rating.csv' % (root_dir, analysis))

#### Within Subjects Correlations - All Condtions

##### Fisher r to z transformation

http://vassarstats.net/tabs_rz.html

For any particular value of r, the Pearson product-moment correlation coefficient, this section will perform the Fisher r-to-z transformation according to the formula
##### zr = (1/2)[loge(1+r) - loge(1-r)]

If a value of N is entered (optional), it will also calculate the standard error of zr as
##### SEzr = 1/sqrt[N-3]

In [None]:
# create group dataframe for all subjects withins-subjects correlations
all_subjects_within_subject_corr_n33 = pd.DataFrame(columns=["R", "p"], index=subjects)

for i in range(len(subjects)):

    # read in ratings.csv for each subject
    ratings = pd.read_csv("%s/%s/results/step1/%d/ratings.csv" % (root_dir, analysis, subjects[i]))
    
    # create array for np arrays for pearsons r calculation
    ratings_np = np.array(ratings["BodyRatings3.RESP"])
    accuracy_np = np.array(ratings["classCorrect"])

    # calculate pearsons r and p values for within-subjects trial accuracy vs rating
    R, p = scipy.stats.pearsonr(ratings_np, accuracy_np)
    
    if np.isnan(R):
        ratings_np, accuracy_np = remove_nan_pearsons_r(ratings_np, accuracy_np, subjects[i])
        R, p = scipy.stats.pearsonr(ratings_np, accuracy_np)
   
        
    # add r and p values for each subject to a complied "all subjects within subjects corr" dataframe 
    all_subjects_within_subject_corr_n33.loc[subjects[i]]["R"] = R
    all_subjects_within_subject_corr_n33.loc[subjects[i]]["p"] = p
    
# calculate fisher r-to-z transformation
all_subjects_within_subject_corr_n33["zr"] = all_subjects_within_subject_corr_n33["R"].map(lambda r: 0.5 * (np.log(1 + r) - np.log(1 - r)))

# write result to csv
all_subjects_within_subject_corr_n33.to_csv("%s/%s/results/step1_compile/all_subjects_within_subj_corr_n33.csv" % (root_dir, analysis))

#### Within Subjects Correlations - By Condition

In [None]:
def calculateWithinSubjectsCorrCondition(subj, condition):

    # read in ratings file
    ratings = pd.read_csv("%s/%s/results/step1/%d/ratings.csv" % (root_dir, analysis, subj))

    # isolate ratings by condition
    ratings_condition = ratings[ratings["condition"] == condition]

    # create array for np arrays for pearsons r calculation
    ratings_condition_np = np.array(ratings_condition["BodyRatings3.RESP"])
    accuracy_condition_np = np.array(ratings_condition["classCorrect"])

    # calculate pearsons r and p values for within-subjects trial accuracy vs rating
    R, p = scipy.stats.pearsonr(ratings_condition_np, accuracy_condition_np)

    if np.isnan(R):
        ratings_condition_np, accuracy_condition_np = remove_nan_pearsons_r(ratings_condition_np, accuracy_condition_np, subj)
        R, p = scipy.stats.pearsonr(ratings_condition_np, accuracy_condition_np)
        
    return R, p

In [None]:
def condition_within_subject_corr(subject_list, condition):
    """Input subject numbers (list), condition (str).
    
        Returns dataframe with all subject's R, p, and zr values."""
    
    # create empty output dataset
    condition_within_subject_corr_df = pd.DataFrame(columns=["R", "p"], index=subject_list)
    
    # calcuate R and p values for all subjects
    for i in range(len(subject_list)):
                
        # calculate R & p values
        R, p = calculateWithinSubjectsCorrCondition(subject_list[i], condition)
        
        # assign R & p values to output dataframe
        condition_within_subject_corr_df.loc[subject_list[i], "R"] = R
        condition_within_subject_corr_df.loc[subject_list[i], "p"] = p
    
    # calculate r to z transformation for all subjects
    condition_within_subject_corr_df["zr"] = condition_within_subject_corr_df["R"].map(lambda r: 0.5 * (np.log(1 + r) - np.log(1 - r)))
    
    return condition_within_subject_corr_df

In [None]:
breath_within_subject_corr_n9 = condition_within_subject_corr(subjects, "breath")
breath_within_subject_corr_n9.to_csv("%s/%s/results/step1_compile/all_subjects_within_subj_corr_n9_breath.csv" % (root_dir, analysis))

In [None]:
self_within_subject_corr_n6 = condition_within_subject_corr(subjects, "self")
self_within_subject_corr_n6.to_csv("%s/%s/results/step1_compile/all_subjects_within_subj_corr_n6_self.csv" % (root_dir, analysis))

In [None]:
feet_within_subject_corr_n9 = condition_within_subject_corr(subjects, "feet")
feet_within_subject_corr_n9.to_csv("%s/%s/results/step1_compile/all_subjects_within_subj_corr_n9_feet.csv" % (root_dir, analysis))

In [None]:
sounds_within_subject_corr_n9 = condition_within_subject_corr(subjects, "sounds")
sounds_within_subject_corr_n9.to_csv("%s/%s/results/step1_compile/all_subjects_within_subj_corr_n9_sounds.csv" % (root_dir, analysis))