# **LoopsResultsAnalysis**

## **1. Importing and First Proccesing**

In [11]:
import pandas as pd
from scipy import stats
import plotly.express as px
from DataCleaning import *
from ProcessingConfig import *
from datetime import datetime as dt

import sys
sys.path.append('../../../msa-1/src/msa')
sys.path.append('../../../msa-1/src/msa/loops1')
sys.path.append('../../../msa-1/scripts/loops1/preprocess_2.py')

import preprocess_2

pd.options.display.max_columns = 50

ModuleNotFoundError: No module named 'loops1'

In [3]:
raw_data = pd.read_excel(cleaning_config['raw_data_path'])
print(f'original shape: {raw_data.shape}')
outliers_threshold = cleaning_config['filter_threshold']
print(f"threshold for outliers detection: {outliers_threshold}")

drop_columns(raw_data, cleaning_config['unnecessary_columns'])
convert_types(raw_data, cleaning_config['type_conversions'])
raw_data = filter_slow_subjects(raw_data, outliers_threshold)
raw_data = filter_bad_subjects(raw_data, outliers_threshold)
raw_data = drop_first_loop(raw_data)
raw_data = only_first_line(raw_data)
raw_data = filter_bad_trials(raw_data, threshold=0.5)
raw_data = filter_slow_steps(raw_data, outliers_threshold)

path = cleaning_config['results_path'] + f'_{dt.now().strftime("%d.%m.%Y_%H-%M")}.xlsx'
raw_data.to_excel(path)

print(f'final shape: {raw_data.shape}')

original shape: (17408, 32)
threshold for outliers detection: 2.25
filter_slow_subjects: No slow subjects detected.
filter_bad_subjects: No bad subjects detected (in terms of low success rate).
drop_first_loop: 2134 rows were filtered out.
only_first_line: 12081 rows were filtered out.
filter_bad_trials: 84 bad trials were filtered (in terms of low success rate):
   subject  trial  trial_success_rate
0     101A      7                 0.0
60    111A   1012                 0.0
59    110B   1001                 0.0
58    110B   1009                 0.0
57    110B   1006                 0.0
..     ...    ...                 ...
24    107A   1002                 0.0
23    107A   1011                 0.0
22    107A   1007                 0.0
30    107A   1006                 0.0
83    111A      8                 0.0

[84 rows x 3 columns]
filter_slow_steps: 81 slow steps were filtered out.
Here is a summary of slow steps rate per subjects: 
              slow steps rate (%)
subject          

In [4]:
n_subjects = raw_data['subject'].nunique()
print(f'number of subjects: {n_subjects}')

n_sessions = raw_data[['subject', 'trial_set']].drop_duplicates().shape[0]
print(f'number of sessions: {n_sessions}')

number of subjects: 17
number of sessions: 31


## **2. Priming Effect Testing**

In [5]:
alpha = 0.05

# enveloping t_test and f_test functions

def f_test(smp1, smp2, alpha):
    f_score = smp1.std() / smp2.std()
    df1, df2 = smp1.size - 1, smp2.size - 1
    return stats.f.cdf(f_score, df1, df2) >= alpha

def t_test_ind(smp1, smp2, alpha, alternative='two-sided'):
    equal_var = f_test(smp1, smp2, alpha)
    t_score, p_value = stats.ttest_ind(smp1, smp2, alternative=alternative, equal_var=equal_var)
    
    print(f'p_value: {p_value}')
    if p_value <= alpha:
        print(f'There is a significant difference between the samples! ({(1-alpha)*100}%).')
    else:
        print(f'It is not possible to determine whether there is an effect ({(1-alpha)*100}%).')
    return t_score, p_value

In [6]:
# mean response time and success rate, group by loop type and loop switching.
switching_diff = pd.DataFrame()

switching_diff['mean_response_time'] = raw_data[raw_data['correct']].groupby(['loop_type', 'loop_type_switch'])['rt'].mean()
switching_diff['mean_success_rate'] = raw_data.groupby(['loop_type', 'loop_type_switch'])['correct'].mean()

switching_diff

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_response_time,mean_success_rate
loop_type,loop_type_switch,Unnamed: 2_level_1,Unnamed: 3_level_1
for,False,3280.274038,0.973479
for,True,3265.996753,0.973144
while,False,3585.572148,0.9536
while,True,3689.630182,0.9648


In [7]:
# checking for priming effect on 'for' loops
same = raw_data.loc[raw_data['correct'] & ( ~ raw_data['loop_type_switch']) & (raw_data['loop_type'] == 'for'), 'rt']
different = raw_data.loc[raw_data['correct'] & (raw_data['loop_type_switch']) & (raw_data['loop_type'] == 'for'), 'rt']

t_score, p_value = t_test_ind(different, same, alpha=alpha, alternative='greater')

p_value: 0.5576595681879694
It is not possible to determine whether there is an effect (95.0%).


In [8]:
# checking for priming effect on 'while' loops
same = raw_data.loc[raw_data['correct'] & ( ~ raw_data['loop_type_switch']) & (raw_data['loop_type'] == 'while'), 'rt']
different = raw_data.loc[raw_data['correct'] & (raw_data['loop_type_switch']) & (raw_data['loop_type'] == 'while'), 'rt']

t_score, p_value = t_test_ind(different, same, alpha=alpha, alternative='greater')

p_value: 0.17272443115918634
It is not possible to determine whether there is an effect (95.0%).
