# **LoopsResultsAnalysis**

## **1. Importing and First Proccesing**

In [3]:
import pandas as pd
from scipy import stats
import plotly.express as px
import ProcessingConfig as config
import ProcessingFuncs as process
import AnalysisFuncs as analyze

pd.options.display.max_columns = 50

In [4]:
raw_data = pd.read_excel(config.cleaning_config['raw_data_path'])

first_lines_data = process.clean_data(raw_data
                                      , drop_assign_steps=True
                                      , drop_first_loop_steps=True
                                      , only_first_lines=True
                                      , filter_subjects=False
                                      , filter_trials=False
                                      , filter_steps=False)
process.save_in_excel(first_lines_data, name='first_lines_data')

analyze.get_sample_size(first_lines_data)

original shape: (21074, 32)
threshold for outliers detection: 2.25 (iqr).
drop_assign: 552 rows were filtered out.
drop_first_loop: 2589 rows were filtered out.
is_first_line: 14068 lines were filtered.
final shape: (3865, 26)
There are 46 sessions from 23 subjects.


In [5]:
best_first_lines = process.clean_data(raw_data
                                      , outliers_threshold=2.25
                                      , trials_success_threshold=0.9
                                      , drop_assign_steps=True
                                      , drop_first_loop_steps=True
                                      , only_first_lines=True
                                      , filter_subjects=True
                                      , filter_trials=True
                                      , filter_steps=True)
process.save_in_excel(best_first_lines, name='best_first_lines')

analyze.get_sample_size(best_first_lines)

original shape: (21074, 25)
threshold for outliers detection: 2.25 (iqr).
drop_assign: 552 rows were filtered out.
drop_first_loop: 2589 rows were filtered out.
filter_slow_subjects: No slow subjects detected.
filter_bad_subjects: No bad subjects detected (in terms of low success rate).
is_first_line: 14068 lines were filtered.
filter_bad_trials: 146 bad trials were filtered (in terms of low success rate):
    subject  trial  trial_success_rate
102    110B      2            0.571429
139    113A      8            0.571429
116    112A   1002            0.714286
32     102A   1003            0.714286
34     102A   1007            0.714286
..      ...    ...                 ...
49     104B      8            0.857143
48     104A     10            0.857143
47     104A      2            0.857143
45     104A      4            0.857143
145    113A   1005            0.857143

[146 rows x 3 columns]
filter_slow_steps: 75 slow steps were filtered out.
Here is a summary of slow steps rate per subje

## **2. General Priming Effect Testing**

### **2.1. First Lines Data**

In [6]:
# mean response time by loop switching only
first_lines_data[first_lines_data['correct']].groupby(['loop_type_switch'])['rt'].mean().to_frame()

Unnamed: 0_level_0,rt
loop_type_switch,Unnamed: 1_level_1
False,3400.887324
True,3445.910666


In [7]:
# mean response time grouped by loop type and loop switching.
switching_diff_basic = pd.DataFrame()

switching_diff_basic['mean_response_time'] = first_lines_data[first_lines_data['correct']].groupby(['loop_type', 'loop_type_switch'])['rt'].mean()
switching_diff_basic['steps_counter'] = first_lines_data[first_lines_data['correct']].groupby(['loop_type', 'loop_type_switch'])['step_num'].count()

switching_diff_basic

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,steps_counter
loop_type,loop_type_switch,Unnamed: 2_level_1,Unnamed: 3_level_1
for,False,3223.37931,928
for,True,3303.547414,928
while,False,3580.328976,918
while,True,3589.668118,919


In [8]:
switch_basic_for_fig = switching_diff_basic.reset_index()
px.bar(switch_basic_for_fig, x='loop_type', y='mean_response_time', color='loop_type_switch', barmode='group'
        , title='Priming Effect Per Loop Type (All First Lines)', labels={'loop_type': 'after switch loop type'
                                                                      , 'mean_response_time': 'mean response time (ms)'
                                                                      , 'loop_type_switch': 'switching loop type'
                                                                      })

In [9]:
# checking for a significant pearson correlation between switching and response time
analyze.test_rt_switch_corr(first_lines_data)

Pearson correlation between loop_type_switch and rt 
            is not significant (p = 0.261398), with value of r = 0.0181


0.018069513950245877

### **2.2. Best First Lines**

In [10]:
# mean response time by loop switching only
best_first_lines[best_first_lines['correct']].groupby(['loop_type_switch'])['rt'].mean().to_frame()

Unnamed: 0_level_0,rt
loop_type_switch,Unnamed: 1_level_1
False,3238.65315
True,3252.44228


In [11]:
# mean response time grouped by loop type and loop switching.
switching_diff_basic_best = pd.DataFrame()

switching_diff_basic_best['mean_response_time'] = best_first_lines[best_first_lines['correct']].groupby(['loop_type', 'loop_type_switch'])['rt'].mean()
switching_diff_basic_best['steps_counter'] = best_first_lines[best_first_lines['correct']].groupby(['loop_type', 'loop_type_switch'])['step_num'].count()

switching_diff_basic_best

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,steps_counter
loop_type,loop_type_switch,Unnamed: 2_level_1,Unnamed: 3_level_1
for,False,3133.140805,696
for,True,3119.216954,696
while,False,3345.859854,685
while,True,3386.826087,690


In [12]:
switch_basic_for_fig = switching_diff_basic_best.reset_index()
px.bar(switch_basic_for_fig, x='loop_type', y='mean_response_time', color='loop_type_switch', barmode='group'
        , title='Priming Effect Per Loop Type (Best First Lines)', labels={'loop_type': 'after switch loop type'
                                                                      , 'mean_response_time': 'mean response time (ms)'
                                                                      , 'loop_type_switch': 'switching loop type'
                                                                      })

In [13]:
# checking for a significant pearson correlation between switching and response time
analyze.test_rt_switch_corr(best_first_lines)

Pearson correlation between loop_type_switch and rt 
            is not significant (p = 0.826768), with value of r = 0.0042


0.004162298137488303

## **3. Different Influencors on Priming Effect**

### **3.1. Arithmetic Types and Loop Ends**

#### **3.1.1. First Lines Data**

In [14]:
analyze.get_arithmetics(first_lines_data, arithmetics_col = 'arithmetic_type')

In [15]:
switching_diff_arithmetics = pd.DataFrame()

switching_diff_arithmetics['mean_response_time'] = first_lines_data[first_lines_data['correct']].groupby(['arithmetic_type', 'loop_type_switch'])['rt'].mean()
switching_diff_arithmetics['number_of_steps'] = first_lines_data[first_lines_data['correct']].groupby(['arithmetic_type', 'loop_type_switch'])['step_num'].count()

switching_diff_arithmetics

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,number_of_steps
arithmetic_type,loop_type_switch,Unnamed: 2_level_1,Unnamed: 3_level_1
"['*', 'round_down']",False,6479.317073,82
"['*', 'round_down']",True,6225.75,80
"['*', 'round_up']",False,4551.710744,121
"['*', 'round_up']",True,4606.368852,122
['*'],False,2891.013928,359
['*'],True,2917.419444,360
['+'],False,3007.631579,532
['+'],True,3008.843866,538
['-'],False,3045.0369,271
['-'],True,3099.522388,268


In [16]:
px.bar(switching_diff_arithmetics.reset_index(), x='arithmetic_type', y='mean_response_time'
       , color='loop_type_switch', barmode='group', title='First Step Arithmetic Influence on Priming (All First Lines)'
       , labels={'mean_response_time': 'mean response time'
                 , 'arithmetic_type': 'first step arithmetic'
                 , 'loop_type_switch': 'switching loop type'})

In [82]:
average_data = first_lines_data[first_lines_data['arithmetic_type'] == "['average']"]
analyze.test_rt_switch_corr(average_data)

Pearson correlation between loop_type_switch and rt 
            is not significant (p = 0.050932), with value of r = 0.0831


0.0831296179330276

#### **3.1.2. Best First Lines**

In [17]:
analyze.get_arithmetics(best_first_lines, arithmetics_col = 'arithmetic_type')

In [18]:
switching_diff_arithmetics_best = pd.DataFrame()

switching_diff_arithmetics_best['mean_response_time'] = best_first_lines[best_first_lines['correct']].groupby(['arithmetic_type', 'loop_type_switch'])['rt'].mean()
switching_diff_arithmetics_best['number_of_steps'] = best_first_lines[best_first_lines['correct']].groupby(['arithmetic_type', 'loop_type_switch'])['step_num'].count()

switching_diff_arithmetics_best

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,number_of_steps
arithmetic_type,loop_type_switch,Unnamed: 2_level_1,Unnamed: 3_level_1
"['*', 'round_down']",False,5578.183673,49
"['*', 'round_down']",True,5615.057692,52
"['*', 'round_up']",False,4304.387097,93
"['*', 'round_up']",True,4267.393617,94
['*'],False,2900.173432,271
['*'],True,2895.220588,272
['+'],False,2908.910891,404
['+'],True,2857.801008,397
['-'],False,2998.927273,220
['-'],True,3073.29148,223


In [19]:
px.bar(switching_diff_arithmetics_best.reset_index(), x='arithmetic_type', y='mean_response_time'
       , color='loop_type_switch', barmode='group', title='First Step Arithmetic Influence on Priming (Best First Lines)'
       , labels={'mean_response_time': 'mean response time'
                 , 'arithmetic_type': 'first step arithmetic'
                 , 'loop_type_switch': 'switching loop type'})

### **3.2. Mistake in Previous Step**

#### **3.2.1. First Lines Data**

In [20]:
analyze.is_prev_correct(raw_data, first_lines_data)

There are 10 steps whose previous step is incorrect.


In [21]:
# mean response time grouped by loop switching and previous step correctness.
switching_diff_prev_correct = pd.DataFrame()

switching_diff_prev_correct['mean_response_time'] = first_lines_data[first_lines_data['correct']].groupby(['loop_type_switch', 'is_prev_correct'])['rt'].mean()
switching_diff_prev_correct['response_time_std'] = first_lines_data[first_lines_data['correct']].groupby(['loop_type_switch', 'is_prev_correct'])['rt'].std()
switching_diff_prev_correct['number_of_steps'] = first_lines_data[first_lines_data['correct']].groupby(['loop_type_switch', 'is_prev_correct'])['step_num'].count()

switching_diff_prev_correct

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,response_time_std,number_of_steps
loop_type_switch,is_prev_correct,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
False,False,2094.333333,540.727596,3
False,True,3403.014107,1976.114906,1843
True,False,4154.25,3504.755662,4
True,True,3444.373304,2186.040038,1843


#### Notice the significantly small amount of steps whose previous step was incorrect (only 10).

In [22]:
switch_prev_for_fig = switching_diff_prev_correct.reset_index()
px.bar(switch_prev_for_fig, x='is_prev_correct', y='mean_response_time', color='loop_type_switch', barmode='group'
        , title='Previous Step Mistake Influence On Priming (Mean Response Time)', labels={'is_prev_correct': 'previous step correctness'
                                                                      , 'mean_response_time': 'mean response time (ms)'
                                                                      , 'loop_type_switch': 'switching loop type'
                                                                      }
        )

In [23]:
switch_prev_for_fig = switching_diff_prev_correct.reset_index()
px.bar(switch_prev_for_fig, x='is_prev_correct', y='response_time_std', color='loop_type_switch', barmode='group'
        , title='Previous Step Mistake Influence On Priming (Response Time STD)', labels={'is_prev_correct': 'previous step correctness'
                                                                      , 'response_time_std': 'response time std (ms)'
                                                                      , 'loop_type_switch': 'switching loop type'
                                                                      }
        )

#### **3.2.2. Best First Lines**

In [24]:
analyze.is_prev_correct(raw_data, best_first_lines)

There are 1 steps whose previous step is incorrect.


##### There is only 1 step whose previous is incorrect, so nothing to analyze.

### **3.3. Learning and Priming**

#### **3.3.1. First Lines Data**

In [25]:
first_lines_learning = analyze.get_n_session(first_lines_data)

In [26]:
# mean response time grouped by loop switching and session number
switching_diff_learning = pd.DataFrame()

switching_diff_learning['mean_response_time'] = first_lines_learning[first_lines_learning['correct']].groupby(['loop_type_switch', 'n_session'])['rt'].mean()
switching_diff_learning['number_of_steps'] = first_lines_learning[first_lines_learning['correct']].groupby(['loop_type_switch', 'n_session'])['step_num'].count()

switching_diff_learning

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,number_of_steps
loop_type_switch,n_session,Unnamed: 2_level_1,Unnamed: 3_level_1
False,1,3963.542576,916
False,2,2846.702151,930
True,1,4045.443593,913
True,2,2859.857602,934


In [27]:
px.bar(switching_diff_learning.reset_index(), x='n_session', y='mean_response_time'
       , color='loop_type_switch', barmode='group'
       , title='Difference in Priming Between Sessions (All first Lines)'
       , labels={'mean_response_time': 'mean response time'
                 , 'n_session': '# session'
                 , 'loop_type_switch': 'switching loops types'})

#### **3.3.2. Best First Lines**

In [28]:
best_first_lines_learning = analyze.get_n_session(best_first_lines)

In [29]:
# mean response time grouped by loop switching and session number
switching_diff_learning_best = pd.DataFrame()

switching_diff_learning_best['mean_response_time'] = best_first_lines_learning[best_first_lines_learning['correct']].groupby(['loop_type_switch', 'n_session'])['rt'].mean()
switching_diff_learning_best['number_of_steps'] = best_first_lines_learning[best_first_lines_learning['correct']].groupby(['loop_type_switch', 'n_session'])['step_num'].count()

switching_diff_learning_best

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,number_of_steps
loop_type_switch,n_session,Unnamed: 2_level_1,Unnamed: 3_level_1
False,1,3776.865079,630
False,2,2787.157124,751
True,1,3813.096063,635
True,2,2778.387483,751


In [30]:
px.bar(switching_diff_learning_best.reset_index(), x='n_session', y='mean_response_time'
       , color='loop_type_switch', barmode='group'
       , title='Difference in Priming Between Sessions (Best First Lines)'
       , labels={'mean_response_time': 'mean response time'
                 , 'n_session': '# session'
                 , 'loop_type_switch': 'switching loops types'})

## **4. Different Influencors on Response Time Generally**

In [31]:
all_lines_data = process.clean_data(raw_data
                                      , only_first_lines=False
                                      , filter_subjects=True
                                      , filter_trials=True
                                      , filter_steps=True)

original shape: (21074, 25)
threshold for outliers detection: 2.25 (iqr).
drop_assign: 552 rows were filtered out.
drop_first_loop: 2589 rows were filtered out.
filter_slow_subjects: No slow subjects detected.
filter_bad_subjects: No bad subjects detected (in terms of low success rate).
is_first_line: There are 3865 first lines over all.
filter_bad_trials: No bad trials detected (in terms of low success rate).
filter_slow_steps: 381 slow steps were filtered out.
Here is a summary of slow steps rate per subjects: 
     subject              104A  107A  107B  113A  111A  101A  102A  110B  111B  \
slow steps rate (%)  1.64  1.89  2.12  2.18  2.25  2.36  2.42  2.44  2.71   

subject              106B  105A  109A  108A  102B  106A  109B  112A  104B  \
slow steps rate (%)  2.84  3.29  3.69   3.7  3.94  4.09  4.25   4.6  4.71   

subject              105B  103A  112B  108B  101B  
slow steps rate (%)  4.96  5.34  5.96   8.1   8.3  
final shape: (17552, 26)


#### **4.1. Arithmetic Types and Loop Ends**

In [32]:
analyze.get_arithmetics(all_lines_data, arithmetics_col = 'arithmetic_type')

In [33]:
performance_per_arithmetic = all_lines_data.groupby('arithmetic_type').agg({'correct': 'mean', 'rt': 'mean', 'step_num': 'count'})
performance_per_arithmetic.columns = ['success_rate', 'mean_rt', 'size']

performance_per_arithmetic

Unnamed: 0_level_0,success_rate,mean_rt,size
arithmetic_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"['*', 'round_down']",0.869688,4177.184136,353
"['*', 'round_up']",0.866058,3536.453577,657
['*'],0.961974,1911.875809,2472
['+'],0.967335,1982.409767,3092
['-'],0.963695,1652.725797,3388
['/'],0.972181,1551.535519,2013
['average'],0.970185,2584.288014,1677
['loop_end'],0.983077,670.552564,3900


In [34]:
# mean response time per arithmetic
px.bar(performance_per_arithmetic.sort_values(by='mean_rt', ascending=False).reset_index()
       , x='arithmetic_type', y='mean_rt', title='Mean Response Time per Arithmetic'
       , labels={'mean_rt': 'mean response time'
                 , 'arithmetic_type': 'arithmetic'}
       )

#### **4.2. Mistake in Previous Step**

In [35]:
non_first_steps = all_lines_data[all_lines_data['step_num'] != 0] # filtering steps with no previous step
analyze.is_prev_correct(raw_data, non_first_steps)

There are 672 steps whose previous step is incorrect.


In [36]:
# checking for a difference in response time after a mistake
after_mistake_diff = non_first_steps.groupby('is_prev_correct')['rt'].agg(['mean', 'std', 'count'])
after_mistake_diff

Unnamed: 0_level_0,mean,std,count
is_prev_correct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,1513.06994,1651.884713,672
True,1736.295201,1530.234329,16880


In [37]:
px.bar(after_mistake_diff.reset_index(), x='is_prev_correct', y='mean'
       , title='Mean Response Time According to Previous Step Success'
       , labels={'mean': 'mean response time'
                 , 'is_prev_correct': 'previous step success'})

In [38]:
analyze.test_rt_switch_corr(non_first_steps, col1='is_prev_correct', col2='rt')

Pearson correlation between is_prev_correct and rt 
            is significant (p = 0.000219), with value of r = 0.0279


0.027894409891428277

## **5. Learning Effect Testing**

### **5.1. General Learning Among All subjects**

In [39]:
all_lines_learning = analyze.get_n_session(all_lines_data)

In [40]:
learning_diff_mean = all_lines_learning.groupby('n_session')[['rt']].mean()
learning_diff_mean.index = learning_diff_mean.index.astype(str)

In [41]:
px.bar(learning_diff_mean.reset_index(), x='n_session', y='rt'
        , title='General Difference in Response Time Between Sessions'
        , labels={'rt': 'mean response time'
                  , 'n_session': '# session'})

In [42]:
analyze.test_rt_switch_corr(all_lines_learning, col1='n_session', col2='rt')

Pearson correlation between n_session and rt 
            is significant (p = 0.0), with value of r = -0.1415


-0.14149527543885657

### **5.2. Learning Within Subject**

In [77]:
learning_per_subject = all_lines_learning.groupby(['subject', 'n_session'])['rt'].mean().unstack()
learning_per_subject['learning_rate'] = 1 - (learning_per_subject[2] / learning_per_subject[1])

In [79]:
px.histogram(learning_per_subject, x='learning_rate'
             , title='Performance Improvement Between Sessions'
             , labels={
                 'learning_rate': 'improvement in mean response time (%)'
             })