# **LoopsResultsAnalysis**

## **1. Importing and First Proccesing**

In [3]:
import pandas as pd
from scipy import stats
import plotly.express as px
import ProcessingConfig as config
import ProcessingFuncs as process
import AnalysisFuncs as analyze

pd.options.display.max_columns = 50

In [4]:
raw_data = pd.read_excel(config.cleaning_config['raw_data_path'])

first_lines_data = process.clean_data(raw_data
                                      , drop_assign_steps=True
                                      , drop_first_loop_steps=True
                                      , only_first_lines=True
                                      , filter_subjects=True
                                      , filter_trials=True
                                      , filter_steps=True)
process.save_in_excel(first_lines_data)

analyze.get_sample_size(first_lines_data)

original shape: (21074, 32)
threshold for outliers detection: 3 (iqr).
drop_assign: 552 rows were filtered out.
drop_first_loop: 2589 rows were filtered out.
filter_slow_subjects: No slow subjects detected.
filter_bad_subjects: No bad subjects detected (in terms of low success rate).
is_first_line: 14068 lines were filtered.
filter_bad_trials: No bad trials detected (in terms of low success rate).
filter_slow_steps: 59 slow steps were filtered out.
Here is a summary of slow steps rate per subjects: 
     subject              101A  111A  104A  102A  106A  107A  102B  109B  106B  \
slow steps rate (%)   0.0   0.0  0.64  0.65  0.65  0.71  1.29  1.29  1.31   

subject              112A  110B  107B  103A  111B  105A  104B  101B  105B  \
slow steps rate (%)  1.32  1.32  1.33  1.35   1.4  2.05  2.19  2.63  2.65   

subject              109A  113A  108A  108B  112B  
slow steps rate (%)  2.74   2.8  3.25  3.42  4.52  
final shape: (3806, 26)
There are 46 sessions from 23 subjects.


## **2. General Priming Effect Testing**

In [6]:
# mean response time by loop switching only
first_lines_data[first_lines_data['correct']].groupby(['loop_type_switch'])['rt'].mean().to_frame()

Unnamed: 0_level_0,rt
loop_type_switch,Unnamed: 1_level_1
False,3309.468237
True,3314.314662


In [7]:
# mean response time grouped by loop type and loop switching.
switching_diff_basic = pd.DataFrame()

switching_diff_basic['mean_response_time'] = first_lines_data[first_lines_data['correct']].groupby(['loop_type', 'loop_type_switch'])['rt'].mean()
switching_diff_basic['steps_counter'] = first_lines_data[first_lines_data['correct']].groupby(['loop_type', 'loop_type_switch'])['step_num'].count()

switching_diff_basic

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,steps_counter
loop_type,loop_type_switch,Unnamed: 2_level_1,Unnamed: 3_level_1
for,False,3208.036717,926
for,True,3155.099562,914
while,False,3413.83,900
while,True,3474.758545,907


In [8]:
switch_basic_for_fig = switching_diff_basic.reset_index()
px.bar(switch_basic_for_fig, x='loop_type', y='mean_response_time', color='loop_type_switch', barmode='group'
        , title='Priming Effect Per Loop Type', labels={'loop_type': 'after switch loop type'
                                                                      , 'mean_response_time': 'mean response time (ms)'
                                                                      , 'loop_type_switch': 'switching loop type'
                                                                      })

In [9]:
# checking for a significant pearson correlation between switching and response time
analyze.test_rt_switch_corr(first_lines_data)

Pearson correlation between loop type switching and response time 
          is not significant (p = 0.521), with value of r = 0.01


## **3. Different Influencors on Priming Effect**

#### **3.1. Arithmetic Types and Loop Ends**

In [10]:
analyze.get_arithmetics(first_lines_data, arithmetics_col = 'arithmetic_type')

In [11]:
switching_diff_arithmetics = pd.DataFrame()

switching_diff_arithmetics['mean_response_time'] = first_lines_data[first_lines_data['correct']].groupby(['arithmetic_type', 'loop_type_switch'])['rt'].mean()
switching_diff_arithmetics['number_of_steps'] = first_lines_data[first_lines_data['correct']].groupby(['arithmetic_type', 'loop_type_switch'])['step_num'].count()

switching_diff_arithmetics

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,number_of_steps
arithmetic_type,loop_type_switch,Unnamed: 2_level_1,Unnamed: 3_level_1
"['*', 'round_down']",False,6001.773333,75
"['*', 'round_down']",True,5631.722222,72
"['*', 'round_up']",False,4470.97479,119
"['*', 'round_up']",True,4461.310924,119
['*'],False,2877.572626,358
['*'],True,2917.419444,360
['+'],False,2935.202268,529
['+'],True,2929.001869,535
['-'],False,3010.696296,270
['-'],True,3053.733083,266


In [12]:
px.bar(switching_diff_arithmetics.reset_index(), x='arithmetic_type', y='mean_response_time'
       , color='loop_type_switch', barmode='group', title='First Step Arithmetic Influence on Priming'
       , labels={'mean_response_time': 'mean response time'
                 , 'arithmetic_type': 'first step arithmetic'
                 , 'loop_type_switch': 'switching loop type'})

#### **3.2. Mistake in Previous Step**

In [13]:
analyze.is_prev_correct(raw_data, first_lines_data)

There are 9 steps whose previous step is incorrect.


In [33]:
# mean response time grouped by loop switching and previous step correctness.
switching_diff_prev_correct = pd.DataFrame()

switching_diff_prev_correct['mean_response_time'] = first_lines_data[first_lines_data['correct']].groupby(['loop_type_switch', 'is_prev_correct'])['rt'].mean()
switching_diff_prev_correct['number_of_steps'] = first_lines_data[first_lines_data['correct']].groupby(['loop_type_switch', 'is_prev_correct'])['step_num'].count()

switching_diff_prev_correct

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,number_of_steps
loop_type_switch,is_prev_correct,Unnamed: 2_level_1,Unnamed: 3_level_1
False,False,2094.333333,3
False,True,3311.46791,1823
True,False,2439.666667,3
True,True,3315.757976,1818


In [34]:
switch_prev_for_fig = switching_diff_prev_correct.reset_index()
px.bar(switch_prev_for_fig, x='is_prev_correct', y='mean_response_time', color='loop_type_switch', barmode='group'
        , title='Previous Step Mistake Influence On Priming', labels={'is_prev_correct': 'previous step correctness'
                                                                      , 'mean_response_time': 'mean response time (ms)'
                                                                      , 'loop_type_switch': 'switching loop type'
                                                                      }
        )

#### **3.3. Learning and Priming**

In [16]:
first_lines_learning = analyze.get_n_session(first_lines_data)

In [17]:
# mean response time grouped by loop switching and session number
switching_diff_learning = pd.DataFrame()

switching_diff_learning['mean_response_time'] = first_lines_learning[first_lines_learning['correct']].groupby(['loop_type_switch', 'n_session'])['rt'].mean()
switching_diff_learning['number_of_steps'] = first_lines_learning[first_lines_learning['correct']].groupby(['loop_type_switch', 'n_session'])['step_num'].count()

switching_diff_learning

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,number_of_steps
loop_type_switch,n_session,Unnamed: 2_level_1,Unnamed: 3_level_1
False,1,3806.123471,899
False,2,2827.814455,927
True,1,3823.534231,891
True,2,2826.449462,930


In [18]:
px.bar(switching_diff_learning.reset_index(), x='n_session', y='mean_response_time'
       , color='loop_type_switch', barmode='group'
       , title='Difference in Priming Between Sessions'
       , labels={'mean_response_time': 'mean response time'
                 , 'n_session': '# session'
                 , 'loop_type_switch': 'switching loops types'})

## **4. Different Influencors on Response Time Generally**

In [19]:
all_lines_data = process.clean_data(raw_data
                                      , only_first_lines=False
                                      , filter_subjects=True
                                      , filter_trials=True
                                      , filter_steps=True)

original shape: (21074, 25)
threshold for outliers detection: 3 (iqr).
drop_assign: 552 rows were filtered out.
drop_first_loop: 2589 rows were filtered out.
filter_slow_subjects: No slow subjects detected.
filter_bad_subjects: No bad subjects detected (in terms of low success rate).
is_first_line: There are 3865 first lines over all.
filter_bad_trials: No bad trials detected (in terms of low success rate).
filter_slow_steps: 203 slow steps were filtered out.
Here is a summary of slow steps rate per subjects: 
     subject              104A  110B  107A  111B  111A  101A  113A  102B  107B  \
slow steps rate (%)  0.23  0.67  0.71  1.13  1.13  1.18  1.21  1.39  1.41   

subject              106B  102A  105A  106A  112A  104B  109A  108A  109B  \
slow steps rate (%)  1.42  1.69  1.88  2.27   2.3  2.35  2.53  2.55  2.68   

subject              112B  105B  103A  101B  108B  
slow steps rate (%)  2.75  2.84  3.02  4.26  5.32  
final shape: (17730, 26)


#### **4.1. Arithmetic Types and Loop Ends**

In [20]:
analyze.get_arithmetics(all_lines_data, arithmetics_col = 'arithmetic_type')

In [21]:
performance_per_arithmetic = all_lines_data.groupby('arithmetic_type').agg({'correct': 'mean', 'rt': 'mean', 'step_num': 'count'})
performance_per_arithmetic.columns = ['success_rate', 'mean_rt', 'size']

performance_per_arithmetic

Unnamed: 0_level_0,success_rate,mean_rt,size
arithmetic_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"['*', 'round_down']",0.865,4577.9075,400
"['*', 'round_up']",0.865607,3746.721098,692
['*'],0.961353,1939.915459,2484
['+'],0.966592,2016.886605,3113
['-'],0.963224,1673.90556,3399
['/'],0.971301,1576.301336,2021
['average'],0.967836,2683.374854,1710
['loop_end'],0.982357,689.812836,3911


In [22]:
# mean response time per arithmetic
px.bar(performance_per_arithmetic.sort_values(by='mean_rt', ascending=False).reset_index()
       , x='arithmetic_type', y='mean_rt', title='Mean Response Time per Arithmetic'
       , labels={'mean_rt': 'mean response time'
                 , 'arithmetic_type': 'arithmetic'}
       )

#### **4.2. Mistake in Previous Step**

In [23]:
non_first_steps = all_lines_data[all_lines_data['step_num'] != 0] # filtering steps with no previous step
analyze.is_prev_correct(raw_data, non_first_steps)

There are 687 steps whose previous step is incorrect.


In [24]:
# checking for a difference in response time after a mistake
after_mistake_diff = non_first_steps.groupby('is_prev_correct')['rt'].agg(['mean', 'std', 'count'])
after_mistake_diff

Unnamed: 0_level_0,mean,std,count
is_prev_correct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,1644.435226,1873.711951,687
True,1792.713959,1637.38658,17043


In [25]:
px.bar(after_mistake_diff.reset_index(), x='is_prev_correct', y='mean'
       , title='Mean Response Time According to Previous Step Success'
       , labels={'mean': 'mean response time'
                 , 'is_prev_correct': 'previous step success'})

## **5. Learning Effect Testing**

In [26]:
all_lines_learning = analyze.get_n_session(all_lines_data)

In [27]:
learning_diff_mean = all_lines_learning.groupby('n_session')[['rt']].mean()
learning_diff_mean.index = learning_diff_mean.index.astype(str)

In [28]:
px.bar(learning_diff_mean.reset_index(), x='n_session', y='rt'
        , title='General Difference in Response Time Between Sessions'
        , labels={'rt': 'mean response time'
                  , 'n_session': '# session'})

In [29]:
learning_diff_90_per = all_lines_learning.groupby('n_session')[['rt']].quantile(0.9)
learning_diff_90_per.index = learning_diff_90_per.index.astype(str)

In [30]:
px.bar(learning_diff_90_per.reset_index(), x='n_session', y='rt'
        , title='Difference in Slow Steps Response Time Between Sessions'
        , labels={'rt': 'response time (90th percentile)'
                  , 'n_session': '# session'})