# **LoopsResultsAnalysis**

## **1. Importing and First Proccesing**

In [14]:
import pandas as pd
from scipy import stats
import plotly.express as px
from DataCleaning import *
from ProcessingConfig import *
from datetime import datetime as dt

pd.options.display.max_columns = 50

In [15]:
raw_data = pd.read_excel(cleaning_config['raw_data_path'])
print(f'original shape: {raw_data.shape}')
outliers_threshold = cleaning_config['filter_threshold']
print(f"threshold for outliers detection: {outliers_threshold}")

drop_columns(raw_data, cleaning_config['unnecessary_columns'])
convert_types(raw_data, cleaning_config['type_conversions'])
filtered_data = filter_slow_subjects(raw_data, outliers_threshold)
filtered_data = filter_bad_subjects(filtered_data, outliers_threshold)
filtered_data = drop_first_loop(filtered_data)
filtered_data = only_first_line(filtered_data)
filtered_data = filter_bad_trials(filtered_data, threshold=analysis_config['trials_success_rate_threshold'])
filtered_data = filter_slow_steps(filtered_data, outliers_threshold)

path = cleaning_config['results_path'] + f'_{dt.now().strftime("%d.%m.%Y_%H-%M")}.xlsx'
filtered_data.to_excel(path)

print(f'final shape: {filtered_data.shape}')

n_subjects = filtered_data['subject'].nunique()
print(f'number of subjects: {n_subjects}')

n_sessions = filtered_data[['subject', 'trial_set']].drop_duplicates().shape[0]
print(f'number of sessions: {n_sessions}')

original shape: (18787, 32)
threshold for outliers detection: 2.25
filter_slow_subjects: No slow subjects detected.
filter_bad_subjects: No bad subjects detected (in terms of low success rate).
drop_first_loop: 2307 rows were filtered out.
only_first_line: 13035 rows were filtered out.
filter_bad_trials: No bad trials detected (in terms of low success rate).
filter_slow_steps: 95 slow steps were filtered out.
Here is a summary of slow steps rate per subjects: 
              slow steps rate (%)
subject                     
101A                    0.00
110A                    0.00
111A                    0.64
104A                    0.64
111B                    1.19
106A                    1.96
102A                    1.96
106B                    1.96
103A                    2.38
102B                    2.58
101B                    3.29
105B                    3.31
105A                    3.42
107A                    3.57
109B                    3.87
108A                    3.90
110B    

## **2. General Priming Effect Testing**

In [16]:
alpha = 0.05

# enveloping t_test and f_test functions

def f_test(smp1, smp2, alpha):
    f_score = smp1.std() / smp2.std()
    df1, df2 = smp1.size - 1, smp2.size - 1
    return stats.f.cdf(f_score, df1, df2) >= alpha

def t_test_ind(smp1, smp2, alpha, alternative='two-sided'):
    equal_var = f_test(smp1, smp2, alpha)
    t_score, p_value = stats.ttest_ind(smp1, smp2, alternative=alternative, equal_var=equal_var)
    
    print(f'p_value: {p_value}')
    if p_value <= alpha:
        print(f'There is a significant difference between the samples! ({(1-alpha)*100}%).')
    else:
        print(f'It is not possible to determine whether there is an effect ({(1-alpha)*100}%).')
    return t_score, p_value

In [17]:
# mean response time and success rate, group by loop type and loop switching.
switching_diff_basic = pd.DataFrame()

switching_diff_basic['mean_response_time'] = filtered_data[filtered_data['correct']].groupby(['loop_type', 'loop_type_switch'])['rt'].mean()

switching_diff_basic

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time
loop_type,loop_type_switch,Unnamed: 2_level_1
for,False,3222.683252
for,True,3230.482716
while,False,3513.130818
while,True,3562.445557


In [25]:
# checking for a correlation between switching and response time
r, p_val = stats.pearsonr(filtered_data['loop_type_switch'], filtered_data['rt'])
significance = 'significant' if p_val < 0.05 else 'not significant'
print(f"""Pearson correlation between loop type switching and response time is {significance} (p = {round(p_val, 3)})
, with value of r = {round(r, 3)}""")

Pearson correlation between loop type switching and response time is not significant (p = 0.33)
, with value of r = 0.017


In [19]:
# checking for priming effect on 'for' loops
same = filtered_data.loc[filtered_data['correct'] & ( ~ filtered_data['loop_type_switch']) & (filtered_data['loop_type'] == 'for'), 'rt']
different = filtered_data.loc[filtered_data['correct'] & (filtered_data['loop_type_switch']) & (filtered_data['loop_type'] == 'for'), 'rt']

t_score, p_value = t_test_ind(different, same, alpha=alpha, alternative='greater')

p_value: 0.462883013839917
It is not possible to determine whether there is an effect (95.0%).


In [20]:
# checking for priming effect on 'while' loops
same = filtered_data.loc[filtered_data['correct'] & ( ~ filtered_data['loop_type_switch']) & (filtered_data['loop_type'] == 'while'), 'rt']
different = filtered_data.loc[filtered_data['correct'] & (filtered_data['loop_type_switch']) & (filtered_data['loop_type'] == 'while'), 'rt']

t_score, p_value = t_test_ind(different, same, alpha=alpha, alternative='greater')

p_value: 0.2977062946331592
It is not possible to determine whether there is an effect (95.0%).


## **3. Different Influencors on Priming Effect**

#### **3.1. Mistake in Previous Step**

In [21]:
indexed_raw_data = raw_data.set_index(['subject', 'trial', 'step_num'])
indexed_raw_data = indexed_raw_data[['correct', 'text1']]

def is_prev_correct(step):
    subject = step.loc['subject']
    step_num = step.loc['step_num']
    trial = step.loc['trial']
    
    if step_num - 1 < 0:
        trial -= 1
        step_num = max(indexed_raw_data.loc[(subject, trial), 'step_num'])
    
    return indexed_raw_data.loc[(subject, trial, step_num - 1), 'correct']

filtered_data['is_prev_correct'] = filtered_data.apply(is_prev_correct, axis=1)

In [22]:
# mean response time and success rate, group by loop type and loop switching.
switching_diff_prev_correct = pd.DataFrame()

switching_diff_prev_correct['mean_response_time'] = filtered_data[filtered_data['correct']].groupby(['loop_type', 'loop_type_switch', 'is_prev_correct'])['rt'].mean()
switching_diff_prev_correct['number_of_steps'] = filtered_data[filtered_data['correct']].groupby(['loop_type', 'loop_type_switch', 'is_prev_correct'])['step_num'].count()

switching_diff_prev_correct

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean_response_time,number_of_steps
loop_type,loop_type_switch,is_prev_correct,Unnamed: 3_level_1,Unnamed: 4_level_1
for,False,False,2295.5,2
for,False,True,3224.939173,822
for,True,False,1928.0,2
for,True,True,3233.706683,808
while,False,False,1692.0,1
while,False,True,3515.424433,794
while,True,False,3463.0,1
while,True,True,3562.570175,798
