# Importing the data

In [10]:
%matplotlib inline
import numpy as np
import pandas as pd
import bambi, os, glob
import statsmodels as sm
import statsmodels.formula.api as smf

In [11]:
dataDir = os.path.abspath(os.path.join(os.getcwd(), '..', 'data/usable/extracted'))
print dataDir
allSubjFilePaths = glob.glob(dataDir + os.sep + 'FC_cfs*clean.csv')
print os.path.basename(allSubjFilePaths[0])
subjNum = len(allSubjFilePaths)
print 'number of subjects: ' + str(subjNum)
df = pd.DataFrame()
for curSubjFileNum in range(subjNum):
    print 'current subject file number = ' + str(curSubjFileNum)
    ds = pd.read_csv(allSubjFilePaths[curSubjFileNum])
    ds.columns = ['subjId', 'domEyeR', 'threshStHi', 'threshStLo', 'thresh', 'trialN',
                  'sentId', 'sentPx', 'congr', 'fam', 'locTop', 'cued', 'crct', 'broken', 'st']
    df = df.append(ds)
print df.shape
df.head(5)

/Users/Egor/Dropbox/Projects/fc/fc/data/usable/extracted
FC_cfs_sub14_20160912_1256_clean.csv
number of subjects: 16
current subject file number = 0
current subject file number = 1
current subject file number = 2
current subject file number = 3
current subject file number = 4
current subject file number = 5
current subject file number = 6
current subject file number = 7
current subject file number = 8
current subject file number = 9
current subject file number = 10
current subject file number = 11
current subject file number = 12
current subject file number = 13
current subject file number = 14
current subject file number = 15
(3840, 15)


Unnamed: 0,subjId,domEyeR,threshStHi,threshStLo,thresh,trialN,sentId,sentPx,congr,fam,locTop,cued,crct,broken,st
0,14,1,0.31181,0.23408,0.27294,1,5,133,1,0,1,1,1,1,1.4901
1,14,1,0.31181,0.23408,0.27294,2,28,108,1,0,0,0,1,1,1.2797
2,14,1,0.31181,0.23408,0.27294,5,27,104,0,0,1,1,1,1,1.5523
3,14,1,0.31181,0.23408,0.27294,6,21,109,0,0,1,1,1,1,1.4809
4,14,1,0.31181,0.23408,0.27294,8,18,117,0,0,1,1,1,1,1.3432


## Centered data set

In [12]:
df['topSubj'] = 0
df.ix[df['locTop']==df['cued'],'topSubj'] = 1
dfc = df
dfc.ix[dfc['cued']==0,'cued'] = -1
dfc.ix[dfc['fam']==0,'fam'] = -1
dfc.ix[dfc['congr']==0,'congr'] = -1
dfc.ix[dfc['topSubj']==0,'topSubj'] = -1
df.head(5)

Unnamed: 0,subjId,domEyeR,threshStHi,threshStLo,thresh,trialN,sentId,sentPx,congr,fam,locTop,cued,crct,broken,st,topSubj
0,14,1,0.31181,0.23408,0.27294,1,5,133,1,-1,1,1,1,1,1.4901,1
1,14,1,0.31181,0.23408,0.27294,2,28,108,1,-1,0,-1,1,1,1.2797,1
2,14,1,0.31181,0.23408,0.27294,5,27,104,-1,-1,1,1,1,1,1.5523,1
3,14,1,0.31181,0.23408,0.27294,6,21,109,-1,-1,1,1,1,1,1.4809,1
4,14,1,0.31181,0.23408,0.27294,8,18,117,-1,-1,1,1,1,1,1.3432,1


# Statistical models

## Mixed linear regression

In [4]:
md = smf.mixedlm('st ~ congr * fam * cued', dfc, re_formula='sentId', groups=dfc['subjId'])
mdf = md.fit()
print(mdf.summary())



               Mixed Linear Model Regression Results
Model:                 MixedLM    Dependent Variable:    st        
No. Observations:      3840       Method:                REML      
No. Groups:            16         Scale:                 0.3789    
Min. group size:       240        Likelihood:            -3730.6076
Max. group size:       240        Converged:             Yes       
Mean group size:       240.0                                       
-------------------------------------------------------------------
                         Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------
Intercept                 1.531    0.155  9.901 0.000  1.228  1.834
congr                     0.003    0.010  0.313 0.754 -0.016  0.023
fam                       0.004    0.010  0.414 0.679 -0.015  0.024
congr:fam                 0.021    0.010  2.160 0.031  0.002  0.041
cued                     -0.034    0.010 -3.440 0.001 -0.053 -0

## Running the Bayesian mixed model

In [5]:
bmd = bambi.Model(df)
#modelFitted = bmd.fit('st ~ congr * fam * cued', samples=200,
#                        random=['1|subjId'], njobs=1)
#modelFitted.plot(burn_in=50)