In [25]:
# Import libraries
import numpy as np
import pandas as pd
import pymysql
from sklearn import (metrics, linear_model)
from sklearn.model_selection import train_test_split

In [3]:
# Connect to PIC
con = pymysql.connect(host='localhost',
                             user='user',
                             password='password',
                             db='pic',
                             cursorclass=pymysql.cursors.DictCursor)
cur=con.cursor()

In [7]:
query = \
"""
WITH surgerybp as (
SELECT svs.subject_id, svs.hadm_id, svs.oper_id, 
    svs.item_no,svs.monitortime,svs.itemid,svs.value,
    a.admittime,a.dischtime,a.hospital_expire_flag,
    CASE WHEN svs.monitortime IS NOT NULL 
        AND svs.monitortime <= a.dischtime
        AND svs.monitortime >= a.admittime THEN 0 
        ELSE 1 END AS drop_flag
FROM surgery_vital_signs svs
LEFT JOIN admissions a
ON svs.hadm_id = a.hadm_id
)
SELECT *
FROM surgerybp
where drop_flag=0;
"""

query_output = pd.read_sql_query(query,con)
print(query_output.head())

   subject_id  hadm_id  oper_id  item_no         monitortime itemid  value  \
0        4531   104527        1        9 2073-09-01 14:50:00    SV1    148   
1        4531   104527        1       10 2073-09-01 14:55:00    SV1    145   
2        4531   104527        1       12 2073-09-01 15:05:00    SV1    150   
3        4531   104527        1       13 2073-09-01 15:05:00    SV1    147   
4        4846   104782        1       15 2109-11-01 16:50:00    SV1    137   

            admittime           dischtime  hospital_expire_flag  drop_flag  
0 2073-08-28 14:26:27 2073-09-29 12:02:00                     0          0  
1 2073-08-28 14:26:27 2073-09-29 12:02:00                     0          0  
2 2073-08-28 14:26:27 2073-09-29 12:02:00                     0          0  
3 2073-08-28 14:26:27 2073-09-29 12:02:00                     0          0  
4 2109-10-17 14:13:19 2109-11-15 10:24:00                     0          0  


In [13]:
surgery_id=query_output.sort_values('oper_id').drop_duplicates(subset=['hadm_id'], keep='first')[['hadm_id','oper_id','hospital_expire_flag']]
print(surgery_id.head())

        hadm_id  oper_id  hospital_expire_flag
0        104527        1                     0
737597   105811        1                     0
737614   106006        1                     0
737620   105999        1                     0
737625   105998        1                     0


In [22]:
#blood pressure variability
def SBPvariability(x,y):
    vital_BP=query_output[(query_output.hadm_id==x)&(query_output.oper_id==y)&(query_output.itemid=='SV3')]
    vital_BP.drop_duplicates(subset=['monitortime'],inplace=True)
    vital_BP.sort_values("item_no",inplace=True)
    vital_BP.reset_index(drop=True,inplace=True)
    n=vital_BP.shape[0]-1
    slope=0
    if n>3:
        global s
        s=0
        for i in range(0,n):
            t=(vital_BP.loc[i+1,'monitortime']-vital_BP.loc[i,'monitortime']).total_seconds()/60
            s=abs(vital_BP.loc[i+1,'value']-vital_BP.loc[i,'value'])/t
            slope=slope+s
        slope=slope/(n+1)
        std = np.array(vital_BP.value).std()
        mean= np.array(vital_BP.value).mean()
    else:
        slope=np.nan
        std=np.nan
        mean=np.nan
    return slope,std,mean
surgery_id['SBPVariability']=list(map(lambda x,y:SBPvariability(x,y),surgery_id['hadm_id'],surgery_id['oper_id']))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [23]:
surgery_id['Slope']=surgery_id['SBPVariability'].astype(str).str.split(',').str[0]
surgery_id['Std']=surgery_id['SBPVariability'].astype(str).str.split(',').str[1]
surgery_id['Mean']=surgery_id['SBPVariability'].astype(str).str.split(',').str[2]
surgery_id['Slope']=surgery_id['Slope'].apply(lambda x:x[1:])
surgery_id['Mean']=surgery_id['Mean'].apply(lambda x:x[:len(x)-1])
surgery_id=surgery_id[surgery_id['Std']!=' nan']
for c in ['Slope','Std','Mean']:
    surgery_id[c]=surgery_id[c].apply(lambda x:round(float(x),2))

In [31]:
#logistic regression model predict mortality
X=surgery_id[['Slope','Std','Mean']]
y=surgery_id[['hospital_expire_flag']]
a_train, a_test, b_train, b_test = train_test_split(X, y, test_size=0.3, random_state=42)
lr=linear_model.LogisticRegression()
lr.fit(a_train,b_train)
predict_lr=lr.predict(a_test)
print('\nAccuracy: {}'\
    .format( metrics.accuracy_score(b_test, predict_lr)))


Accuracy: 0.9805499664654594


  y = column_or_1d(y, warn=True)
