# Intial commands

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import redcaputils
import statsmodels.formula.api as smf
%matplotlib inline
pd.set_option('display.max_columns', None)

In [2]:
patients = pd.read_csv("patients.csv")
controls = pd.read_csv("controls.csv")
df = pd.concat([patients, controls])

# Regression models

In [3]:
cols_rename = { 'redcap_event_name': 'PATIENT', 'bdi_v2': 'bdi', 'stai_i_v2': 'stai_i', 'stai_ii_v2': 'stai_ii',
    'fss_v2': 'fss', 'ess_v2': 'ess', 'vas_now_v2': 'vas_now', 'vas_4wk_aver_v2': 'vas_4wk_aver',
    'scc_v2': 'scc', 'eq5d3l_v2': 'eq5d3l', 'eq5d3l_vas_v2': 'eq5d3l_vas', 'bfi_e_v2': 'bfi_e',
    'bfi_p_v2': 'bfi_p', 'bfi_s_v2': 'bfi_s', 'bfi_n_v2': 'bfi_n', 'bfi_o_v2': 'bfi_o',
    'sf_12_v2': 'sf_12', 'f_phq_suma': 'f_phq',
}
values_changes = { 'F': 0, 'M': 1, 'enrollment_arm_1': 1, 'enrollment_arm_2': 0 }
df = redcaputils.convert_dataframe( df, columns_conversion_dict=cols_rename, values_conversion_dict=values_changes )
#df.head()

In [4]:
model = smf.ols(formula="""sf_12 ~
age
+ pohlavi
+ duration
+ s_fmdrs_sum
+ rls_dg
+ bdi
+ stai_i
+ stai_ii
+ fss
+ ess
+ vas_now
+ vas_4wk_aver
+ scc
+ eq5d3l
+ eq5d3l_vas
+ bfi_e
+ bfi_p
+ bfi_s
+ bfi_n
+ bfi_o
+ f_phq
+ f_phq_somatic
""", data=df[df['PATIENT']==1]) #+ sf_12
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,sf_12,R-squared:,0.997
Model:,OLS,Adj. R-squared:,0.941
Method:,Least Squares,F-statistic:,17.62
Date:,"Tue, 30 May 2017",Prob (F-statistic):,0.186
Time:,21:18:39,Log-Likelihood:,-5.8866
No. Observations:,24,AIC:,57.77
Df Residuals:,1,BIC:,84.87
Df Model:,22,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-2.1966,28.028,-0.078,0.950,-358.332,353.939
age,0.2576,0.118,2.175,0.274,-1.247,1.762
pohlavi,9.5727,7.045,1.359,0.404,-79.944,99.089
duration,0.1354,0.174,0.778,0.579,-2.075,2.345
s_fmdrs_sum,0.1015,0.331,0.307,0.811,-4.102,4.305
rls_dg,-0.2703,3.634,-0.074,0.953,-46.449,45.908
bdi,-0.4496,0.231,-1.947,0.302,-3.384,2.484
stai_i,0.1493,0.142,1.050,0.484,-1.658,1.957
stai_ii,-0.4834,0.249,-1.945,0.302,-3.642,2.675

0,1,2,3
Omnibus:,2.333,Durbin-Watson:,1.734
Prob(Omnibus):,0.311,Jarque-Bera (JB):,1.941
Skew:,0.583,Prob(JB):,0.379
Kurtosis:,2.237,Cond. No.,9530.0


In [5]:
model = smf.ols(formula="""s_fmdrs_sum ~
age
+ pohlavi
+ duration
+ sf_12
+ rls_dg
+ bdi
+ stai_i
+ stai_ii
+ fss
+ ess
+ vas_now
+ vas_4wk_aver
+ scc
+ eq5d3l
+ eq5d3l_vas
+ bfi_e
+ bfi_p
+ bfi_s
+ bfi_n
+ bfi_o
+ f_phq
+ f_phq_somatic
""", data=df[df['PATIENT']==1]) #+ sf_12
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,s_fmdrs_sum,R-squared:,0.987
Model:,OLS,Adj. R-squared:,0.696
Method:,Least Squares,F-statistic:,3.39
Date:,"Tue, 30 May 2017",Prob (F-statistic):,0.407
Time:,21:18:40,Log-Likelihood:,-31.358
No. Observations:,24,AIC:,108.7
Df Residuals:,1,BIC:,135.8
Df Model:,22,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-49.9361,64.100,-0.779,0.579,-864.408,764.535
age,-0.3071,0.760,-0.404,0.755,-9.959,9.345
pohlavi,-25.8415,22.633,-1.142,0.458,-313.424,261.741
duration,-0.1014,0.629,-0.161,0.898,-8.092,7.889
sf_12,0.8475,2.763,0.307,0.811,-34.261,35.956
rls_dg,-6.7367,8.097,-0.832,0.558,-109.617,96.143
bdi,0.7556,1.250,0.604,0.654,-15.127,16.639
stai_i,-0.3414,0.489,-0.699,0.612,-6.550,5.867
stai_ii,0.9506,1.251,0.760,0.586,-14.942,16.843

0,1,2,3
Omnibus:,1.554,Durbin-Watson:,2.157
Prob(Omnibus):,0.46,Jarque-Bera (JB):,0.989
Skew:,-0.496,Prob(JB):,0.61
Kurtosis:,2.919,Cond. No.,8030.0


In [6]:
model = smf.ols(formula="""eq5d3l ~
age
+ pohlavi
+ duration
+ sf_12
+ rls_dg
+ bdi
+ stai_i
+ stai_ii
+ fss
+ ess
+ vas_now
+ vas_4wk_aver
+ scc
+ s_fmdrs_sum
+ eq5d3l_vas
+ bfi_e
+ bfi_p
+ bfi_s
+ bfi_n
+ bfi_o
+ f_phq
+ f_phq_somatic
""", data=df[df['PATIENT']==1]) #+ sf_12
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,eq5d3l,R-squared:,0.986
Model:,OLS,Adj. R-squared:,0.68
Method:,Least Squares,F-statistic:,3.222
Date:,"Tue, 30 May 2017",Prob (F-statistic):,0.417
Time:,21:18:40,Log-Likelihood:,6.3855
No. Observations:,24,AIC:,33.23
Df Residuals:,1,BIC:,60.32
Df Model:,22,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,8.3363,14.655,0.569,0.671,-177.872,194.545
age,-0.1399,0.097,-1.450,0.384,-1.367,1.087
pohlavi,-4.1660,5.784,-0.720,0.603,-77.654,69.322
duration,-0.0600,0.118,-0.510,0.700,-1.556,1.436
sf_12,0.5080,0.319,1.594,0.357,-3.541,4.557
rls_dg,0.8228,2.025,0.406,0.754,-24.903,26.549
bdi,0.2089,0.220,0.952,0.516,-2.581,2.999
stai_i,-0.0640,0.106,-0.605,0.654,-1.409,1.281
stai_ii,0.2271,0.234,0.971,0.509,-2.745,3.199

0,1,2,3
Omnibus:,2.103,Durbin-Watson:,2.413
Prob(Omnibus):,0.349,Jarque-Bera (JB):,1.704
Skew:,-0.628,Prob(JB):,0.427
Kurtosis:,2.647,Cond. No.,8860.0
