# **LoopsResultsAnalysis**

## **1. Importing and First Proccesing**

In [1]:
import pandas as pd
from scipy import stats
import plotly.express as px
from DataCleaning import *
from ProcessingConfig import *
from datetime import datetime as dt

pd.options.display.max_columns = 50

In [2]:
raw_data = pd.read_excel(cleaning_config['raw_data_path'])
print(f'original shape: {raw_data.shape}')
outliers_threshold = cleaning_config['filter_threshold']
print(f"threshold for outliers detection: {outliers_threshold}")

drop_columns(raw_data, cleaning_config['unnecessary_columns'])
convert_types(raw_data, cleaning_config['type_conversions'])
raw_data = filter_slow_subjects(raw_data, outliers_threshold)
raw_data = filter_bad_subjects(raw_data, outliers_threshold)
raw_data = drop_first_loop(raw_data)
raw_data = only_first_line(raw_data)
raw_data = filter_bad_trials(raw_data, threshold=analysis_config['trials_success_rate_threshold'])
raw_data = filter_slow_steps(raw_data, outliers_threshold)

path = cleaning_config['results_path'] + f'_{dt.now().strftime("%d.%m.%Y_%H-%M")}.xlsx'
raw_data.to_excel(path)

print(f'final shape: {raw_data.shape}')

original shape: (17408, 32)
threshold for outliers detection: 2.25
filter_slow_subjects: No slow subjects detected.
filter_bad_subjects: No bad subjects detected (in terms of low success rate).
drop_first_loop: 2134 rows were filtered out.
only_first_line: 12081 rows were filtered out.
filter_bad_trials: No bad trials detected (in terms of low success rate).
filter_slow_steps: 87 slow steps were filtered out.
Here is a summary of slow steps rate per subjects: 
              slow steps rate (%)
subject                     
101A                    0.00
110A                    0.00
110B                    0.00
104A                    0.64
111A                    0.64
102A                    1.96
106B                    1.96
106A                    1.96
103A                    2.38
102B                    2.58
101B                    3.29
105A                    3.42
107A                    3.57
109B                    3.87
108A                    3.90
107B                    4.00
105B    

In [3]:
n_subjects = raw_data['subject'].nunique()
print(f'number of subjects: {n_subjects}')

n_sessions = raw_data[['subject', 'trial_set']].drop_duplicates().shape[0]
print(f'number of sessions: {n_sessions}')

number of subjects: 21
number of sessions: 38


## **2. Priming Effect Testing**

In [4]:
alpha = 0.05

# enveloping t_test and f_test functions

def f_test(smp1, smp2, alpha):
    f_score = smp1.std() / smp2.std()
    df1, df2 = smp1.size - 1, smp2.size - 1
    return stats.f.cdf(f_score, df1, df2) >= alpha

def t_test_ind(smp1, smp2, alpha, alternative='two-sided'):
    equal_var = f_test(smp1, smp2, alpha)
    t_score, p_value = stats.ttest_ind(smp1, smp2, alternative=alternative, equal_var=equal_var)
    
    print(f'p_value: {p_value}')
    if p_value <= alpha:
        print(f'There is a significant difference between the samples! ({(1-alpha)*100}%).')
    else:
        print(f'It is not possible to determine whether there is an effect ({(1-alpha)*100}%).')
    return t_score, p_value

In [5]:
# mean response time and success rate, group by loop type and loop switching.
switching_diff_basic = pd.DataFrame()

switching_diff_basic['mean_response_time'] = raw_data[raw_data['correct']].groupby(['loop_type', 'loop_type_switch'])['rt'].mean()

switching_diff_basic

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time
loop_type,loop_type_switch,Unnamed: 2_level_1
for,False,3290.793194
for,True,3301.628989
while,False,3580.155827
while,True,3652.074224


In [6]:
# checking for a correlation between switching and response time
r, p_val = stats.pearsonr(raw_data['loop_type_switch'], raw_data['rt'])
significance = 'significant' if p_val < 0.05 else 'not significant'
print(f'Pearson correlation between loop type switching and response time is {significance} (p = {round(p_val, 3)}), with a value of r = {round(r, 3)}')

Pearson correlation between loop type switching and response time is not significant (p = 0.259), with a value of r = 0.02


In [7]:
# checking for priming effect on 'for' loops
same = raw_data.loc[raw_data['correct'] & ( ~ raw_data['loop_type_switch']) & (raw_data['loop_type'] == 'for'), 'rt']
different = raw_data.loc[raw_data['correct'] & (raw_data['loop_type_switch']) & (raw_data['loop_type'] == 'for'), 'rt']

t_score, p_value = t_test_ind(different, same, alpha=alpha, alternative='greater')

p_value: 0.4507441623576721
It is not possible to determine whether there is an effect (95.0%).


In [8]:
# checking for priming effect on 'while' loops
same = raw_data.loc[raw_data['correct'] & ( ~ raw_data['loop_type_switch']) & (raw_data['loop_type'] == 'while'), 'rt']
different = raw_data.loc[raw_data['correct'] & (raw_data['loop_type_switch']) & (raw_data['loop_type'] == 'while'), 'rt']

t_score, p_value = t_test_ind(different, same, alpha=alpha, alternative='greater')

p_value: 0.2272831032848695
It is not possible to determine whether there is an effect (95.0%).
