In [40]:
import pandas as pd # for manipulating data frames
import pingouin as pg # for running statistics
#import matplotlib.pyplot as pyplot
import plotly.express as px

In [41]:
data = pd.read_csv('logs/logs_merged.csv', sep=',')
data

Unnamed: 0,DesignName,ParticipantID,TrialID,Block1,Block2,VV,OC,visualSearchTime,ErrorCount
0,Preattentive Variables,1,1,1,1,Shadow,Medium,1552,0
1,Preattentive Variables,1,2,1,2,Shadow,Low,1134,0
2,Preattentive Variables,1,3,1,3,Shadow,High,1140,0
3,Preattentive Variables,1,4,1,4,Shadow,Low,768,0
4,Preattentive Variables,1,5,1,5,Shadow,Medium,904,0
...,...,...,...,...,...,...,...,...,...
265,Preattentive Variables,6,266,3,11,Shadow,Low,858,0
266,Preattentive Variables,6,267,3,12,Shadow,Medium,743,0
267,Preattentive Variables,6,268,3,13,Shadow,Low,648,0
268,Preattentive Variables,6,269,3,14,Shadow,High,676,0


In [42]:
data.describe(include = 'all')

Unnamed: 0,DesignName,ParticipantID,TrialID,Block1,Block2,VV,OC,visualSearchTime,ErrorCount
count,270,270.0,270.0,270.0,270.0,270,270,270.0,270.0
unique,1,,,,,3,3,,
top,Preattentive Variables,,,,,Shadow,Low,,
freq,270,,,,,90,90,,
mean,,3.5,135.5,2.0,8.0,,,3073.048148,0.051852
std,,1.710997,78.086491,0.818013,4.328517,,,4067.195605,0.238288
min,,1.0,1.0,1.0,1.0,,,558.0,0.0
25%,,2.0,68.25,1.0,4.0,,,986.25,0.0
50%,,3.5,135.5,2.0,8.0,,,1504.0,0.0
75%,,5.0,202.75,3.0,12.0,,,3420.5,0.0


In [43]:
data.iloc[2]

DesignName          Preattentive Variables
ParticipantID                            1
TrialID                                  3
Block1                                   1
Block2                                   3
VV                                  Shadow
OC                                    High
visualSearchTime                      1140
ErrorCount                               0
Name: 2, dtype: object

In [44]:
data.dtypes

DesignName          object
ParticipantID        int64
TrialID              int64
Block1               int64
Block2               int64
VV                  object
OC                  object
visualSearchTime     int64
ErrorCount           int64
dtype: object

In [45]:
data['ParticipantID'] = data['ParticipantID'].astype('str')
data.dtypes

DesignName          object
ParticipantID       object
TrialID              int64
Block1               int64
Block2               int64
VV                  object
OC                  object
visualSearchTime     int64
ErrorCount           int64
dtype: object

In [46]:
### Applying them to a column gives a result of type series
data.visualSearchTime.mean()

3073.0481481481484

In [47]:
### We can get a breakdown by condition using the groupby function
data.groupby('VV').visualSearchTime.mean()

VV
Both      6437.455556
Motion    1511.677778
Shadow    1270.011111
Name: visualSearchTime, dtype: float64

In [48]:
### Applying those aggregating functions to a dataframe gives a result of type dataframe
data.groupby('VV').mean() # result is a dataframe

Unnamed: 0_level_0,TrialID,Block1,Block2,visualSearchTime,ErrorCount
VV,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Both,135.5,2.0,8.0,6437.455556,0.122222
Motion,135.5,2.0,8.0,1511.677778,0.011111
Shadow,135.5,2.0,8.0,1270.011111,0.022222


In [49]:
### aggregate is a more elaborate aggregate function
# the line below is equivalent to: data.groupby('Lens').mean()
# data.groupby('Lens').aggregate('mean') 
# but here aggregate is used to specify how to aggregate different columns
data.groupby('VV').aggregate({'TrialID': 'sum', 'visualSearchTime': 'mean'})

Unnamed: 0_level_0,TrialID,visualSearchTime
VV,Unnamed: 1_level_1,Unnamed: 2_level_1
Both,12195,6437.455556
Motion,12195,1511.677778
Shadow,12195,1270.011111


In [50]:
OCAsStr = data['OC'].copy().astype('str')
# make a copy of column ID and change its type from float to str
VVAsStr = data['VV'].copy().astype('str')
# now that we have strings, we can concatenate them using function 'cat'
data['Condition: OC, VV'] = OCAsStr.str.cat(VVAsStr, sep=", ")
data

Unnamed: 0,DesignName,ParticipantID,TrialID,Block1,Block2,VV,OC,visualSearchTime,ErrorCount,"Condition: OC, VV"
0,Preattentive Variables,1,1,1,1,Shadow,Medium,1552,0,"Medium, Shadow"
1,Preattentive Variables,1,2,1,2,Shadow,Low,1134,0,"Low, Shadow"
2,Preattentive Variables,1,3,1,3,Shadow,High,1140,0,"High, Shadow"
3,Preattentive Variables,1,4,1,4,Shadow,Low,768,0,"Low, Shadow"
4,Preattentive Variables,1,5,1,5,Shadow,Medium,904,0,"Medium, Shadow"
...,...,...,...,...,...,...,...,...,...,...
265,Preattentive Variables,6,266,3,11,Shadow,Low,858,0,"Low, Shadow"
266,Preattentive Variables,6,267,3,12,Shadow,Medium,743,0,"Medium, Shadow"
267,Preattentive Variables,6,268,3,13,Shadow,Low,648,0,"Low, Shadow"
268,Preattentive Variables,6,269,3,14,Shadow,High,676,0,"High, Shadow"


In [51]:
data.groupby('ParticipantID').count()

Unnamed: 0_level_0,DesignName,TrialID,Block1,Block2,VV,OC,visualSearchTime,ErrorCount,"Condition: OC, VV"
ParticipantID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,45,45,45,45,45,45,45,45,45
2,45,45,45,45,45,45,45,45,45
3,45,45,45,45,45,45,45,45,45
4,45,45,45,45,45,45,45,45,45
5,45,45,45,45,45,45,45,45,45
6,45,45,45,45,45,45,45,45,45


In [52]:
fig = px.histogram(data, x='Condition: OC, VV', color='ParticipantID')
fig.show()

In [53]:
fig = px.histogram(data, x='visualSearchTime')
fig.show()

In [54]:
fig = px.histogram(data, x='visualSearchTime', marginal='box')
fig.show()


In [55]:
fig = px.histogram(data, x='visualSearchTime', color='OC', marginal='box')
fig.show()

In [56]:
fig = px.histogram(data, x='visualSearchTime', color='VV', marginal='box')
fig.show()

In [57]:
fig = px.histogram(data, x='ErrorCount', color='OC')
fig.show()

In [58]:
fig = px.histogram(data, x='ErrorCount', color='VV')
fig.show()

In [59]:
fig = px.histogram(data, x='visualSearchTime', color='OC', marginal='box', log_x=True, log_y=False)
fig.show()
#not sure why Y axis is changed here?

Inferential statistics

In [64]:
# filter data for each VV then do correlation table for below
bothData = data[(data['VV']=='Both')]
shadowData= data[(data['VV']=='Shadow')]
motionData = data[(data['VV']=='Motion')] 
motionData

Unnamed: 0,DesignName,ParticipantID,TrialID,Block1,Block2,VV,OC,visualSearchTime,ErrorCount,"Condition: OC, VV"
15,Preattentive Variables,1,16,2,1,Motion,Medium,1605,0,"Medium, Motion"
16,Preattentive Variables,1,17,2,2,Motion,Low,985,0,"Low, Motion"
17,Preattentive Variables,1,18,2,3,Motion,High,2126,0,"High, Motion"
18,Preattentive Variables,1,19,2,4,Motion,Low,1142,0,"Low, Motion"
19,Preattentive Variables,1,20,2,5,Motion,Medium,910,0,"Medium, Motion"
...,...,...,...,...,...,...,...,...,...,...
235,Preattentive Variables,6,236,1,11,Motion,Low,1437,0,"Low, Motion"
236,Preattentive Variables,6,237,1,12,Motion,Medium,1421,0,"Medium, Motion"
237,Preattentive Variables,6,238,1,13,Motion,Low,1220,0,"Low, Motion"
238,Preattentive Variables,6,239,1,14,Motion,High,2434,0,"High, Motion"


In [65]:
correlation_table1 = pg.pairwise_corr(motionData, columns=['OC','visualSearchTime'])
correlation_table1

Unnamed: 0,X,Y,method,alternative,n,r,CI95%,p-unc,BF10,power
0,visualSearchTime,TrialID,pearson,two-sided,90,-0.02596,"[-0.23, 0.18]",0.8081,0.136,0.056631
1,visualSearchTime,Block1,pearson,two-sided,90,0.001515,"[-0.21, 0.21]",0.988693,0.132,0.049846
2,visualSearchTime,Block2,pearson,two-sided,90,0.148652,"[-0.06, 0.35]",0.162022,0.345,0.289211
3,visualSearchTime,ErrorCount,pearson,two-sided,90,-0.0024,"[-0.21, 0.2]",0.982087,0.132,0.049881


In [66]:
correlation_table2 = pg.pairwise_corr(shadowData, columns=['OC','visualSearchTime'])
correlation_table2

Unnamed: 0,X,Y,method,alternative,n,r,CI95%,p-unc,BF10,power
0,visualSearchTime,TrialID,pearson,two-sided,90,-0.118798,"[-0.32, 0.09]",0.264759,0.243,0.200947
1,visualSearchTime,Block1,pearson,two-sided,90,-0.147724,"[-0.34, 0.06]",0.164688,0.341,0.286191
2,visualSearchTime,Block2,pearson,two-sided,90,-0.268314,"[-0.45, -0.06]",0.01056,3.285,0.731727
3,visualSearchTime,ErrorCount,pearson,two-sided,90,-0.069665,"[-0.27, 0.14]",0.514108,0.162,0.100058


In [67]:
correlation_table3 = pg.pairwise_corr(bothData, columns=['OC','visualSearchTime'])
correlation_table3

Unnamed: 0,X,Y,method,alternative,n,r,CI95%,p-unc,BF10,power
0,visualSearchTime,TrialID,pearson,two-sided,90,-0.062944,"[-0.27, 0.15]",0.555608,0.156,0.090642
1,visualSearchTime,Block1,pearson,two-sided,90,-0.111893,"[-0.31, 0.1]",0.293729,0.227,0.18334
2,visualSearchTime,Block2,pearson,two-sided,90,-0.017323,"[-0.22, 0.19]",0.871265,0.133,0.052847
3,visualSearchTime,ErrorCount,pearson,two-sided,90,-0.128493,"[-0.33, 0.08]",0.227447,0.27,0.227507


In [None]:
r2 = correlation_table['r'] * correlation_table['r']
r2
#run anova test for each hypothesis