# **LoopsResultsAnalysis**

## **1. Importing and First Proccesing**

In [1]:
import pandas as pd
from scipy import stats
import plotly.express as px
import ProcessingConfig as config
import ProcessingFuncs as process
import AnalysisFuncs as analyze

pd.options.display.max_columns = 50

In [2]:
raw_data = pd.read_excel(config.cleaning_config['raw_data_path'])

first_lines_data = process.clean_data(raw_data
                                      , drop_assign_steps=True
                                      , drop_first_loop_steps=True
                                      , only_first_lines=True
                                      , filter_subjects=True
                                      , filter_trials=True
                                      , filter_steps=True)
process.save_in_excel(first_lines_data)

analyze.get_sample_size(first_lines_data)

original shape: (20617, 32)
threshold for outliers detection: 2.25
drop_assign: 540 rows were filtered out.
drop_first_loop: 2532 rows were filtered out.
filter_slow_subjects: No slow subjects detected.
filter_bad_subjects: No bad subjects detected (in terms of low success rate).
is_first_line: 16296 lines were filtered.
filter_bad_trials: No bad trials detected (in terms of low success rate).
filter_slow_steps: 135 slow steps were filtered out.
Here is a summary of slow steps rate per subjects: 
     subject              101A  110A  104A  111A  102A  106A  106B  111B  102B  \
slow steps rate (%)   0.0   0.0  1.14   1.7  1.71  1.74  1.75  2.08  2.84   

subject              101B  109B  107B  110B  105B  105A  107A  108A  103A  \
slow steps rate (%)  2.91  3.43  3.53  3.55  3.59  3.68  3.77   4.0  4.14   

subject              112A  109A  108B  104B  112B  113A  103B  
slow steps rate (%)  4.17  4.85  5.49  5.92  7.29  7.29  8.64  
final shape: (4186, 26)
There are 45 sessions from 25 s

## **2. General Priming Effect Testing**

In [19]:
# mean response time by loop switching only
first_lines_data[first_lines_data['correct']].groupby(['loop_type_switch'])['rt'].mean().to_frame()

Unnamed: 0_level_0,rt
loop_type_switch,Unnamed: 1_level_1
False,3242.707146
True,3379.240297


In [18]:
# mean response time grouped by loop type and loop switching.
switching_diff_basic = pd.DataFrame()

switching_diff_basic['mean_response_time'] = first_lines_data[first_lines_data['correct']].groupby(['loop_type', 'loop_type_switch'])['rt'].mean()
switching_diff_basic['steps_counter'] = first_lines_data[first_lines_data['correct']].groupby(['loop_type', 'loop_type_switch'])['step_num'].count()

switching_diff_basic

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,steps_counter
loop_type,loop_type_switch,Unnamed: 2_level_1,Unnamed: 3_level_1
for,False,3105.324675,1155
for,True,3250.145928,884
while,False,3383.627886,1126
while,True,3510.714286,868


In [4]:
# checking for a significant pearson correlation between switching and response time
analyze.test_rt_switch_corr(first_lines_data)

Pearson correlation between loop type switching and response time 
          is significant (p = 0.002), with value of r = 0.047


## **3. Different Influencors on Priming Effect**

#### **3.1. Mistake in Previous Step**

In [5]:
analyze.is_prev_correct(raw_data, first_lines_data)

There are 548 steps whose previous step is incorrect.


In [41]:
# mean response time grouped by loop switching and previous step correctness.
switching_diff_prev_correct = pd.DataFrame()

switching_diff_prev_correct['mean_response_time'] = first_lines_data[first_lines_data['correct']].groupby(['loop_type_switch', 'is_prev_correct'])['rt'].mean()
switching_diff_prev_correct['number_of_steps'] = first_lines_data[first_lines_data['correct']].groupby(['loop_type_switch', 'is_prev_correct'])['step_num'].count()

switching_diff_prev_correct

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,number_of_steps
loop_type_switch,is_prev_correct,Unnamed: 2_level_1,Unnamed: 3_level_1
False,False,2873.40113,531
False,True,3354.765143,1750
True,False,2439.666667,3
True,True,3380.851915,1749


## **4. Different Influencors on Response Time Generally**

In [7]:
all_lines_data = process.clean_data(raw_data
                                      , only_first_lines=False
                                      , filter_subjects=True
                                      , filter_trials=True
                                      , filter_steps=True)

original shape: (20617, 25)
threshold for outliers detection: 2.25
drop_assign: 540 rows were filtered out.
drop_first_loop: 2532 rows were filtered out.
filter_slow_subjects: No slow subjects detected.
filter_bad_subjects: No bad subjects detected (in terms of low success rate).
is_first_line: There are 4321 first lines over all.
filter_bad_trials: No bad trials detected (in terms of low success rate).
filter_slow_steps: 437 slow steps were filtered out.
Here is a summary of slow steps rate per subjects: 
     subject              110A  113A  111B  104A  107B  110B  112B  111A  107A  \
slow steps rate (%)  0.65  1.09  1.53  1.75  1.96  2.17  2.19   2.4  2.41   

subject              102A  106B  112A  109A  101A  108A  102B  104B  109B  \
slow steps rate (%)  2.83  3.02  3.51   3.7  3.71  3.73  4.53  4.58  4.79   

subject              105A  105B  106A  103A  103B  108B  101B  
slow steps rate (%)  4.82   5.0  5.46  5.65  6.36  7.66  8.94  
final shape: (20180, 26)


#### **4.2. Step Type - Arithmetics vs Loop Ends**

In [33]:
all_lines_data['is_loop_end'] = all_lines_data['loop_step'] == all_lines_data['n_iterations']

In [37]:
all_lines_data.groupby('is_loop_end').agg({'correct': 'mean', 'rt': 'mean', 'step_num': 'count'})

Unnamed: 0_level_0,correct,rt,step_num
is_loop_end,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,0.938993,1756.816482,18129
True,0.967333,1954.725987,2051


#### **4.2. Mistake in Previous Step**

In [8]:
non_first_steps = all_lines_data[all_lines_data['step_num'] != 0] # filtering steps with no previous step
analyze.is_prev_correct(raw_data, non_first_steps)

There are 1222 steps whose previous step is incorrect.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['is_prev_correct'] = data.apply(get_prev_correct, axis=1)


In [9]:
non_first_steps.groupby('is_prev_correct')['rt'].agg(['mean', 'std', 'count'])

Unnamed: 0_level_0,mean,std,count
is_prev_correct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,2099.185761,1575.092844,1222
True,1755.031037,1560.585644,18913
