# Linear SVC model

## Assessing the predictive ability of a linear SVC model
Using a training and testing split of the DCD and DBD datasets to fit and assess the predictive ability of a linear SVC model

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
import sklearn.metrics as mets
%matplotlib inline

In [2]:
#Read in dataset with all rows included
df = pd.read_sas("Data/alldata3.sas7bdat")

#6931 DBD apps
dbd_apps = df[(df["eli_DBD"]==1)&(df["FAMILY_APPROACHED"]==2)]

#6060 DBD apps to match cohort in paper
dbd_apps = dbd_apps[(dbd_apps["eth_grp"]!=5)&(dbd_apps["FORMAL_APR_WHEN"]!=4)&(dbd_apps["donation_mentioned"]!=-1)
                    &(dbd_apps["FAMILY_WITNESS_BSDT"]!=9)&(dbd_apps["GENDER"]!=9)]
     
#9965 DCD apps
dcd_apps = df[(df["eli_DCD"]==1)&(df["FAMILY_APPROACHED"]==2)]

#9405 DCD apps to match cohort in paper
dcd_apps = dcd_apps[(dcd_apps["GENDER"]!=9)&(dcd_apps["cod_neuro"].notna())&(dcd_apps["eth_grp"]!=5)&(dcd_apps["donation_mentioned"]!=-1)&
                    (~dcd_apps["DTC_WD_TRTMENT_PRESENT"].isin([8,9]))]

# Columns used to create DBD model in paper
dbd_cols = ["wish", "FORMAL_APR_WHEN", "donation_mentioned", "app_nature", "eth_grp", "religion_grp", "GENDER", "FAMILY_WITNESS_BSDT", "DTC_PRESENT_BSD_CONV", 
            "acorn_new", "adult","FAMILY_CONSENT"]

dbd_apps[dbd_cols].astype(int)

dbd_model_data = dbd_apps[dbd_cols]
dbd_model_data2 = pd.get_dummies(data=dbd_model_data,columns=dbd_cols[:-1],drop_first=True)

dbd_features = dbd_model_data2.drop("FAMILY_CONSENT",axis=1)
dbd_consents = dbd_model_data2["FAMILY_CONSENT"]

# Columns used to create DCD model in paper
dcd_cols = ["wish", "donation_mentioned", 
            "app_nature", "eth_grp", "religion_grp", "GENDER", "DTC_WD_TRTMENT_PRESENT", 
            "acorn_new", "adult","cod_neuro","FAMILY_CONSENT"]

dcd_apps[dbd_cols].astype(int)

dcd_model_data = dcd_apps[dcd_cols]
dcd_model_data2 = pd.get_dummies(data=dcd_model_data,columns=dcd_cols[:-1],drop_first=True)

dcd_features = dcd_model_data2.drop("FAMILY_CONSENT",axis=1)
dcd_consents = dcd_model_data2["FAMILY_CONSENT"]

# creating a train and testing dataset for DBD and DCD approaches

# 1382 consents, 618 non-consents in test data
DBD_X_train, DBD_X_test, DBD_y_train, DBD_y_test = train_test_split(dbd_features,dbd_consents, test_size=0.33, random_state=10)

# 1865 consents, 1239 non-consents in t
DCD_X_train, DCD_X_test, DCD_y_train, DCD_y_test = train_test_split(dcd_features,dcd_consents, test_size=0.33, random_state=10)

  rslt[name] = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d")
  rslt[name] = self._string_chunk[js, :]


In [4]:
# fitting SVC to training data 
SVC_model = LinearSVC()

In [7]:
DBD_SVC = SVC_model.fit(DBD_X_train,DBD_y_train)

DBD_preds = DBD_SVC.predict(DBD_X_test)

mets.confusion_matrix(DBD_y_test, DBD_preds)

array([[ 220,  398],
       [  96, 1286]])

In [8]:
# classification report for DBD model
print(mets.classification_report(DBD_y_test, DBD_preds))

print("Cohen kappa: " + str(round(mets.cohen_kappa_score(DBD_y_test, DBD_preds),2)))

              precision    recall  f1-score   support

         1.0       0.70      0.36      0.47       618
         2.0       0.76      0.93      0.84      1382

    accuracy                           0.75      2000
   macro avg       0.73      0.64      0.65      2000
weighted avg       0.74      0.75      0.73      2000

Cohen kappa: 0.33


In [10]:
DCD_SVC = SVC_model.fit(DCD_X_train,DCD_y_train)

DCD_preds = DCD_SVC.predict(DCD_X_test)

mets.confusion_matrix(DCD_y_test, DCD_preds)

array([[ 775,  464],
       [ 399, 1466]])

In [11]:
# classification report for DCD model
print(mets.classification_report(DCD_y_test, DCD_preds))

print("Cohen kappa: " + str(round(mets.cohen_kappa_score(DCD_y_test, DCD_preds),2)))

              precision    recall  f1-score   support

         1.0       0.66      0.63      0.64      1239
         2.0       0.76      0.79      0.77      1865

    accuracy                           0.72      3104
   macro avg       0.71      0.71      0.71      3104
weighted avg       0.72      0.72      0.72      3104

Cohen kappa: 0.42
