# **LoopsResultsExploration**

## **1. Importing and First Proccesing**

In [62]:
import pandas as pd
from DataCleaning import *

pd.options.display.max_columns = 50

In [63]:
raw_data = pd.read_excel(cleaning_config['raw_data_path'])
raw_data = clean_data(raw_data)

In [64]:
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5071 entries, 0 to 5070
Data columns (total 27 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   index                   5071 non-null   int64 
 1   subject                 5071 non-null   object
 2   step_num                5071 non-null   int64 
 3   uid                     5071 non-null   int64 
 4   trial_start_time        5071 non-null   int64 
 5   rt                      5071 non-null   int32 
 6   response                5071 non-null   object
 7   loop_step               5071 non-null   int32 
 8   trial_order             5071 non-null   int64 
 9   var_mapping             5071 non-null   object
 10  trial_set               5071 non-null   int64 
 11  trial                   5071 non-null   int64 
 12  core_program            5071 non-null   int64 
 13  variant                 5071 non-null   object
 14  step_id                 5071 non-null   int64 
 15  text

In [65]:
raw_data.head()

Unnamed: 0,index,subject,step_num,uid,trial_start_time,rt,response,loop_step,trial_order,var_mapping,trial_set,trial,core_program,variant,step_id,text1,response_needed,expected_response,is_loop,loop_type,loop_type_switch,n_iterations,n_loop_lines,step_type,prev_loop_type,correct,same_as_prev_loop_type
0,7,107B,7,30,38002,818,2,1,1,1=a;2=i,2,4,2,FWWWWFFW,3,while a >= 2:\n a /= 2,True,"[4, 2, 1]",True,while,True,3,1,loop,0,False,False
1,8,107B,8,30,38811,302,1,2,1,1=a;2=i,2,4,2,FWWWWFFW,3,while a >= 2:\n a /= 2,True,"[4, 2, 1]",True,while,True,3,1,loop,0,False,False
2,11,107B,11,31,43376,69,5,1,1,1=a;2=i,2,4,2,FWWWWFFW,4,while a <= 7:\n a += 2,True,"[3, 5, 7, 9]",True,while,False,4,1,loop,0,False,False
3,12,107B,12,31,44123,237,7,2,1,1=a;2=i,2,4,2,FWWWWFFW,4,while a <= 7:\n a += 2,True,"[3, 5, 7, 9]",True,while,False,4,1,loop,0,False,False
4,13,107B,13,31,44963,327,9,3,1,1=a;2=i,2,4,2,FWWWWFFW,4,while a <= 7:\n a += 2,True,"[3, 5, 7, 9]",True,while,False,4,1,loop,0,False,False


## **2. Exploring Outliers**

### 2.1. Response-Time Between Subjects

In [66]:
response_times = raw_data[['subject', 'step_num', 'rt']].copy()
response_times.drop_duplicates(inplace=True)
response_time_agg = response_times[['rt', 'subject']].groupby('subject').agg(['std', 'mean'])

# flatting the columns level
response_time_agg.columns = ['_'.join(col) for col in response_time_agg.columns.values]

response_time_agg.sort_values(by='rt_std')

Unnamed: 0_level_0,rt_std,rt_mean
subject,Unnamed: 1_level_1,Unnamed: 2_level_1
107B,1107.481888,1337.406818
104A,1154.958519,1452.411364
109B,1205.228505,1131.595455
108A,1257.528715,1740.395455
104B,1451.739484,1394.095455
105B,1544.058469,2513.627273
101B,1572.542826,2130.194805
106A,1602.449832,1917.463636
102B,1782.107346,2276.977273
109A,1809.132157,1995.15


In [67]:
def is_outlier(x, x_q1, x_q3, x_iqr, threshold):
    return (x_q1 - x) / x_iqr >= threshold or (x - x_q3) / x_iqr >= threshold

In [68]:
mean_q3, mean_q1 = response_time_agg['rt_mean'].quantile([0.75, 0.25])
mean_iqr = mean_q3 - mean_q1

mean_outlier_mask = response_time_agg['rt_mean'].apply(is_outlier, args=(mean_q1, mean_q3, mean_iqr, 1.5))
mean_outliers = response_time_agg[mean_outlier_mask].index

if mean_outliers.size > 0:
    print(f'Seems that: {list(mean_outliers)} are outliers in terms of mean response time within subject.')
else:
    print("No Outliers detected! at least in terms of mean response time within subject.")

No Outliers detected! at least in terms of mean response time within subject.


In [69]:
std_q3, std_q1 = response_time_agg['rt_std'].quantile([0.75, 0.25])
std_iqr = std_q3 - std_q1

std_outlier_mask = response_time_agg['rt_std'].apply(is_outlier, args=(std_q1, std_q3, std_iqr, 1.5))
std_outliers = response_time_agg[std_outlier_mask].index
if std_outliers.size > 0:
    print(f'Seems that: {list(std_outliers)} are outliers in terms of response time variance within subject.')
else:
    print("No Outliers detected! at least in terms of response time variance within subject.")

Seems that: ['103B'] are outliers in terms of response time variance within subject.


#### *It seems that in terms of response time variance within subject, **103B** is an outlier, and **108B** is arguably also an outlier.*

In [70]:
print(f"103B's grade: {(response_time_agg.loc['103B', 'rt_std'] - std_q3) / std_iqr}")
print(f"108B's grade: {(response_time_agg.loc['108B', 'rt_std'] - std_q3) / std_iqr}")

103B's grade: 1.856285494638352
108B's grade: 1.470637240522526


#### *I would not filter 103B and 108B out, because they are not so extreme.*

### 2.2. Mistakes Rate (%) Between Subjects

### 2.3. Program Mistakes Rate (%) Within Subject 

### 2.3. Single Steps Response-Time Within Subject

In [71]:
quantiles_per_subject = response_times[['rt', 'subject']].groupby('subject').quantile([0.25, 0.75]).unstack()
quantiles_per_subject.columns = ['q1', 'q3']
quantiles_per_subject['iqr'] = quantiles_per_subject['q3'] - quantiles_per_subject['q1']

quantiles_per_subject.head(3)

Unnamed: 0_level_0,q1,q3,iqr
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
101B,1207.0,2550.5,1343.5
102A,1245.0,3497.75,2252.75
102B,1045.0,2832.5,1787.5


In [72]:
# finding if a step is an outlier in terms of response time within subject
def is_subjective_outlier(step):
    rt = step['rt']
    subject = step['subject']
        
    subject_quantiles = quantiles_per_subject.loc[subject]
    q1, q3, iqr = subject_quantiles['q1'], subject_quantiles['q3'], subject_quantiles['iqr']
    return is_outlier(rt, q1, q3, iqr, 2)

In [73]:
subjective_outlier_mask = response_times[['subject', 'rt', 'step_num']].apply(is_subjective_outlier, axis=1)
outlier_steps = response_times[subjective_outlier_mask]

In [78]:
outlier_steps.merge(quantiles_per_subject, how='left', left_on='subject', right_index=True)

Unnamed: 0,subject,step_num,rt,q1,q3,iqr
208,107B,441,4779,543.50,1825.75,1282.25
425,104B,430,5978,380.25,1829.50,1449.25
548,106B,229,8350,824.25,3001.25,2177.00
660,105B,7,8432,1464.00,3129.75,1665.75
710,105B,107,7764,1464.00,3129.75,1665.75
...,...,...,...,...,...,...
4913,106A,135,6048,882.50,2499.00,1616.50
4929,106A,167,6214,882.50,2499.00,1616.50
4964,106A,240,13808,882.50,2499.00,1616.50
5001,106A,317,11442,882.50,2499.00,1616.50
