# Importing the data

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import bambi, os, glob
import statsmodels as sm
import statsmodels.formula.api as smf

In [2]:
dataDir = os.path.abspath(os.path.join(os.getcwd(), '..', 'data/usable/extracted'))
print dataDir
allSubjFilePaths = glob.glob(dataDir + os.sep + 'FC_cfs*clean.csv')
print os.path.basename(allSubjFilePaths[0])
subjNum = len(allSubjFilePaths)
print 'number of subjects: ' + str(subjNum)
df = pd.DataFrame()
for curSubjFileNum in range(subjNum):
    print 'current subject file number = ' + str(curSubjFileNum)
    ds = pd.read_csv(allSubjFilePaths[curSubjFileNum])
    ds.columns = ['subjId', 'domEyeR', 'threshStHi', 'threshStLo', 'thresh', 'trialN',
                  'sentId', 'sentPx', 'congr', 'fam', 'locTop', 'cued', 'crct', 'broken', 'st']
    df = df.append(ds)
print df.shape
df.head(5)

/Users/egor/Dropbox/Projects/fc/fc/data/usable/extracted
FC_cfs_sub01_20160915_1402_clean.csv
number of subjects: 15
current subject file number = 0
current subject file number = 1
current subject file number = 2
current subject file number = 3
current subject file number = 4
current subject file number = 5
current subject file number = 6
current subject file number = 7
current subject file number = 8
current subject file number = 9
current subject file number = 10
current subject file number = 11
current subject file number = 12
current subject file number = 13
current subject file number = 14
(3600, 15)


Unnamed: 0,subjId,domEyeR,threshStHi,threshStLo,thresh,trialN,sentId,sentPx,congr,fam,locTop,cued,crct,broken,st
0,1,1,0.24862,0.2398,0.24421,1,10,124,1,0,0,0,1,1,1.1426
1,1,1,0.24862,0.2398,0.24421,2,15,121,0,1,1,1,1,1,1.7793
2,1,1,0.24862,0.2398,0.24421,3,28,100,0,0,0,0,1,1,1.3451
3,1,1,0.24862,0.2398,0.24421,4,28,100,0,0,1,1,1,1,1.4134
4,1,1,0.24862,0.2398,0.24421,5,2,133,0,1,0,0,1,1,1.5021


## Centered data set

In [3]:
dfc = df
dfc.ix[dfc['cued']==0,'cued'] = -1
dfc.ix[dfc['fam']==0,'fam'] = -1
dfc.ix[dfc['congr']==0,'congr'] = -1

# Statistical models

## Mixed linear regression

In [4]:
md = smf.mixedlm('st ~ congr * fam * cued', dfc, re_formula='sentId', groups=dfc['subjId'])
mdf = md.fit()
print(mdf.summary())



               Mixed Linear Model Regression Results
Model:                 MixedLM    Dependent Variable:    st        
No. Observations:      3600       Method:                REML      
No. Groups:            15         Scale:                 0.2837    
Min. group size:       240        Likelihood:            -2978.6575
Max. group size:       240        Converged:             Yes       
Mean group size:       240.0                                       
-------------------------------------------------------------------
                         Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------
Intercept                 1.496    0.138 10.850 0.000  1.226  1.766
congr                    -0.003    0.009 -0.353 0.724 -0.021  0.014
fam                       0.004    0.009  0.484 0.628 -0.013  0.022
congr:fam                 0.017    0.009  1.883 0.060 -0.001  0.034
cued                     -0.030    0.009 -3.396 0.001 -0.048 -0

## Running the Bayesian mixed model

In [5]:
bmd = bambi.Model(df)
modelFitted = bmd.fit('st ~ congr * fam * cued', samples=200,
                        random=['1|subjId'], njobs=1)
modelFitted.plot(burn_in=50)

TypeError: get_dummies() got an unexpected keyword argument 'drop_first'