# **LoopsResultsAnalysis**

## **1. Importing and First Proccesing**

In [1]:
import numpy as np
import pandas as pd
from scipy import stats
from DataCleaning import *
from ProcessingConfig import *

pd.options.display.max_columns = 50

In [2]:
raw_data = pd.read_excel(cleaning_config['raw_data_path'])
print(f'original shape: {raw_data.shape}')
print(f"threshold for outliers detection: {cleaning_config['filter_threshold']}")
raw_data = clean_data(raw_data)
print(f'final shape: {raw_data.shape}')

original shape: (13294, 32)
threshold for outliers detection: 2.25
-- drop_first_loop: 1630 rows were filtered out.
-- filter_trial_outliers: 211 rows were filtered out.
-- filter_step_outliers: 275 rows were filtered out.
final shape: (11178, 26)


In [3]:
n_subjects = raw_data['subject'].nunique()
print(f'number of subjects: {n_subjects}')

n_sessions = raw_data[['subject', 'trial_set']].drop_duplicates().shape[0]
print(f'number of sessions: {n_sessions}')

number of subjects: 15
number of sessions: 29


In [4]:
# enveloping t_test and f_test functions

def f_test(smp1, smp2, alpha):
    f_score = smp1.std() / smp2.std()
    df1, df2 = smp1.size - 1, smp2.size - 1
    return stats.f.cdf(f_score, df1, df2) >= alpha

def t_test_ind(smp1, smp2, alpha, alternative='two-sided'):
    equal_var = f_test(smp1, smp2, alpha)
    t_score, p_value = stats.ttest_ind(smp1, smp2, alternative=alternative, equal_var=equal_var)
    
    print(f'p_value: {p_value}')
    if p_value <= alpha:
        print(f'There is a significant difference between the samples! ({(1-alpha)*100}%).')
    else:
        print(f'It is not possible to determine whether there is an effect ({(1-alpha)*100}%).')
    return t_score, p_value

## **2. Priming Effect Testing**

In [5]:
alpha = 0.05

In [30]:
# note that 'loop_step' is an id of each step in the loop, ranging 0-len(loop).
only_first_line = raw_data['loop_step'] == 0
loop_priming_data = raw_data[only_first_line].copy()

loop_priming_data.to_excel(r'Results\LoopPrimingData.xlsx')

print(f'loop_priming_data has {loop_priming_data.shape[0]} rows.')

loop_priming_data has 2250 rows.


In [24]:
# mean response time and success rate, group by loop type and loop switching.
switching_diff = pd.DataFrame()

switching_diff['mean_response_time'] = loop_priming_data[loop_priming_data['correct']].groupby(['loop_type', 'loop_type_switch'])['rt'].mean()
switching_diff['mean_success_rate'] = loop_priming_data.groupby(['loop_type', 'loop_type_switch'])['correct'].mean()

switching_diff

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,mean_success_rate
loop_type,loop_type_switch,Unnamed: 2_level_1,Unnamed: 3_level_1
for,False,3135.508897,0.980803
for,True,3169.817204,0.977233
while,False,3402.6,0.965704
while,True,3480.482243,0.969203


In [25]:
# checking for priming effect on 'for' loops
same = loop_priming_data.loc[loop_priming_data['correct'] & ( ~ loop_priming_data['loop_type_switch']) & (loop_priming_data['loop_type'] == 'for'), 'rt']
different = loop_priming_data.loc[loop_priming_data['correct'] & (loop_priming_data['loop_type_switch']) & (loop_priming_data['loop_type'] == 'for'), 'rt']

t_score, p_value = t_test_ind(different, same, alpha=alpha, alternative='greater')

p_value: 0.35478490049617406
It is not possible to determine whether there is an effect (95.0%).


In [26]:
# checking for priming effect on 'while' loops
same = loop_priming_data.loc[loop_priming_data['correct'] & ( ~ loop_priming_data['loop_type_switch']) & (loop_priming_data['loop_type'] == 'while'), 'rt']
different = loop_priming_data.loc[loop_priming_data['correct'] & (loop_priming_data['loop_type_switch']) & (loop_priming_data['loop_type'] == 'while'), 'rt']

t_score, p_value = t_test_ind(different, same, alpha=alpha, alternative='greater')

p_value: 0.22297437329252862
It is not possible to determine whether there is an effect (95.0%).


## **3. Difference Between Loop Types**

In [27]:
all_loops_data = raw_data.copy()

In [28]:
# general difference between response time in 'while' loops versus 'for' loops
for_rt = all_loops_data.loc[all_loops_data['correct'] & (all_loops_data['loop_type'] == 'for'), 'rt']
while_rt = all_loops_data.loc[all_loops_data['correct'] & (all_loops_data['loop_type'] == 'while'), 'rt']

t_score, p_value = t_test_ind(while_rt, for_rt, alpha=alpha, alternative='greater')

p_value: 6.062364677586016e-16
There is a significant difference between the samples! (95.0%).


In [29]:
# difference between 'end loop' step response time in 'while' loops versus 'for' loops
loop_end_mask = (all_loops_data['response_needed']) & (all_loops_data['expected_response'].isnull()) & (all_loops_data['correct'])

for_end_loop_rt = all_loops_data.loc[loop_end_mask & (all_loops_data['loop_type'] == 'for'), 'rt']
while_end_loop_rt = all_loops_data.loc[loop_end_mask & (all_loops_data['loop_type'] == 'while'), 'rt']

t_score, p_value = t_test_ind(while_end_loop_rt, for_end_loop_rt, alpha=alpha, alternative='greater')

p_value: 4.907838664353287e-10
There is a significant difference between the samples! (95.0%).
