# Logistic regression model

## Recreating the logisitic regression from the paper using Sci-kit learn

In this notebook a logisitic regression is fit for the DBD and DCD approach data to determine if the Scikit learn LR model is similar to the SAS model. 

In [60]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
%matplotlib inline

In [61]:
#import data
import pandas as pd

#Read in dataset with all rows included
df = pd.read_sas("Data/alldata3.sas7bdat")

#6931 DBD apps
dbd_apps = df[(df["eli_DBD"]==1)&(df["FAMILY_APPROACHED"]==2)]

#6060 DBD apps to match cohort in pape
dbd_apps = dbd_apps[(dbd_apps["eth_grp"]!=5)&(dbd_apps["FORMAL_APR_WHEN"]!=4)&(dbd_apps["donation_mentioned"]!=-1)
                    &(dbd_apps["FAMILY_WITNESS_BSDT"]!=9)&(dbd_apps["GENDER"]!=9)]
     
#9965 DCD apps
dcd_apps = df[(df["eli_DCD"]==1)&(df["FAMILY_APPROACHED"]==2)]

#9405 DCD apps to match cohort in paper
dcd_apps = dcd_apps[(dcd_apps["GENDER"]!=9)&(dcd_apps["cod_neuro"].notna())&(dcd_apps["eth_grp"]!=5)&(dcd_apps["donation_mentioned"]!=-1)&
                    (~dcd_apps["DTC_WD_TRTMENT_PRESENT"].isin([8,9]))]


  rslt[name] = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d")
  rslt[name] = self._string_chunk[js, :]


In [62]:
# Columns used to create DBD model in paper
dbd_cols = ["wish", "FORMAL_APR_WHEN", "donation_mentioned", "app_nature", "eth_grp", "religion_grp", "GENDER", "FAMILY_WITNESS_BSDT", "DTC_PRESENT_BSD_CONV", 
            "acorn_new", "adult","FAMILY_CONSENT"]

dbd_apps[dbd_cols].astype(int)

dbd_model_data = dbd_apps[dbd_cols]
dbd_model_data2 = pd.get_dummies(data=dbd_model_data,columns=dbd_cols[:-1],drop_first=True)

dbd_features = dbd_model_data2.drop("FAMILY_CONSENT",axis=1)
dbd_consents = dbd_model_data2["FAMILY_CONSENT"]

dbd_feature_names = dbd_features.columns.tolist()
for i,feat in enumerate(dbd_feature_names):
    print(i, feat)

0 wish_2.0
1 wish_3.0
2 wish_4.0
3 wish_5.0
4 FORMAL_APR_WHEN_2.0
5 FORMAL_APR_WHEN_3.0
6 donation_mentioned_2.0
7 donation_mentioned_3.0
8 donation_mentioned_4.0
9 app_nature_2.0
10 app_nature_3.0
11 eth_grp_2.0
12 eth_grp_3.0
13 eth_grp_4.0
14 religion_grp_2.0
15 religion_grp_3.0
16 religion_grp_4.0
17 religion_grp_5.0
18 religion_grp_9.0
19 GENDER_2.0
20 FAMILY_WITNESS_BSDT_2.0
21 DTC_PRESENT_BSD_CONV_2.0
22 acorn_new_2.0
23 acorn_new_3.0
24 acorn_new_4.0
25 acorn_new_5.0
26 acorn_new_6.0
27 adult_1.0


In [63]:
LR_model = LogisticRegression(penalty='none',solver='newton-cg')

DBD_LR = LR_model.fit(dbd_features,dbd_consents)

odds_ratios_dbd = np.exp(DBD_LR.coef_) 

for i,OR in enumerate(odds_ratios_dbd[0]):
    print(i, OR)

0 23.80722980760914
1 7.576653765431177
2 18.482599727943164
3 1.5464623044578556
4 0.43437745890520324
5 0.3996890769021021
6 1.2585124281607571
7 1.781352143184595
8 2.0113375632390436
9 0.8764721474506368
10 0.27228412924441264
11 0.4983566150649152
12 0.2891905071618778
13 0.8380086713563211
14 0.17052823635951458
15 1.3116429685043627
16 1.015498704467321
17 0.6896515682327877
18 0.7845047623914746
19 0.7905014126343302
20 0.7846916244032003
21 1.394171856272005
22 0.9334310592493109
23 0.9323263580660089
24 0.7148737987456711
25 0.7526260215983743
26 0.7351785513380329
27 0.658979306010143


In [64]:
# Columns used to create DCD model in paper
dcd_cols = ["wish", "donation_mentioned", 
            "app_nature", "eth_grp", "religion_grp", "GENDER", "DTC_WD_TRTMENT_PRESENT", 
            "acorn_new", "adult","cod_neuro","FAMILY_CONSENT"]

dcd_apps[dbd_cols].astype(int)

dcd_model_data = dcd_apps[dcd_cols]
dcd_model_data2 = pd.get_dummies(data=dcd_model_data,columns=dcd_cols[:-1],drop_first=True)

dcd_features = dcd_model_data2.drop("FAMILY_CONSENT",axis=1)
dcd_consents = dcd_model_data2["FAMILY_CONSENT"]

dcd_feature_names = dcd_features.columns.tolist()
for i,feat in enumerate(dcd_feature_names):
    print(i, feat)

0 wish_2.0
1 wish_3.0
2 wish_4.0
3 wish_5.0
4 donation_mentioned_2.0
5 donation_mentioned_3.0
6 donation_mentioned_4.0
7 app_nature_2.0
8 app_nature_3.0
9 eth_grp_2.0
10 eth_grp_3.0
11 eth_grp_4.0
12 religion_grp_2.0
13 religion_grp_3.0
14 religion_grp_4.0
15 religion_grp_5.0
16 religion_grp_9.0
17 GENDER_2.0
18 DTC_WD_TRTMENT_PRESENT_2.0
19 acorn_new_2.0
20 acorn_new_3.0
21 acorn_new_4.0
22 acorn_new_5.0
23 acorn_new_6.0
24 adult_1.0
25 cod_neuro_1.0


In [65]:
DCD_LR = LR_model.fit(dcd_features,dcd_consents)

odds_ratios_dcd = np.exp(DCD_LR.coef_) 

for i,OR in enumerate(odds_ratios_dcd[0]):
    print(i, OR)

0 10.14682075623482
1 5.622985008122633
2 17.977705392561827
3 1.474097788321959
4 1.5581170613545272
5 2.457709883837136
6 2.5294620773138803
7 0.9974311742114284
8 0.2584502680627293
9 0.7853178828580134
10 0.469329465412041
11 1.0792626365158449
12 0.1251994824620407
13 0.6504965386512473
14 1.2963125246762466
15 0.6775928744718072
16 0.6720535494174026
17 0.861854935728391
18 1.4278616534678883
19 0.9758525136546797
20 0.9249834087443919
21 0.9485153185013969
22 0.8184484743131082
23 0.8476033871458712
24 1.2587168336464936
25 1.0841089703658375
