## AutoML Prediction Process
###  1) Read data
###  2) Load Model
###  3) Run predictions

In [1]:
from copy import copy

import numpy as np
from numpy import inf
import pandas as pd
from datetime import datetime
import functools
import matplotlib.pyplot as plt  

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn import svm 
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier  #RF and GBM algorithm
from sklearn.linear_model import ElasticNet, SGDClassifier
from sklearn.model_selection import GridSearchCV   #Perforing grid search
from sklearn import preprocessing, neighbors, metrics
import sklearn
if sklearn.__version__<'0.20':
    from sklearn.cross_validation import train_test_split, KFold, StratifiedKFold, PredefinedSplit
else:
    from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, PredefinedSplit

import scipy.stats as st
from sklearn.metrics import mean_absolute_error, accuracy_score, log_loss, make_scorer, auc, roc_auc_score

import joblib
import yaml

import xgboost as xgb

%matplotlib inline

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
import automl
from automl import StackLayer, TextElasticNetBinary, MissingDataHandler

In [3]:
# import importlib
# importlib.reload(automl)

In [4]:
#https://www.hindawi.com/journals/bmri/2014/781670/
input_data=pd.read_csv("diabetes_data.csv")
input_data.sample(5)

Unnamed: 0,rowID,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,payer_code,...,glipizide.metformin,glimepiride.pioglitazone,metformin.rosiglitazone,metformin.pioglitazone,change,diabetesMed,readmitted,diag_1_desc,diag_2_desc,diag_3_desc
2833,2834,AfricanAmerican,Female,[60-70),?,Elective,Discharged to home,Physician Referral,1,MC,...,No,No,No,No,No,No,False,Coronary atherosclerosis of unspecified type o...,Angina decubitus,"Respiratory abnormality, unspecified"
3085,3086,Caucasian,Male,[60-70),?,Urgent,Discharged to home,Physician Referral,2,MC,...,No,No,No,No,No,No,False,Paroxysmal supraventricular tachycardia,Coronary atherosclerosis of unspecified type o...,Malignant essential hypertension
1405,1406,Caucasian,Male,[40-50),?,Not Available,Discharged/transferred to home with home healt...,,11,?,...,No,No,No,No,Ch,Yes,False,Cellulitis and abscess of face,"Femoral hernia with obstruction, unilateral or...","Postoperative shock, unspecified"
4632,4633,Caucasian,Male,[10-20),?,Emergency,Discharged to home,Emergency Room,3,?,...,No,No,No,No,Ch,Yes,False,"Diabetes with ketoacidosis, type I [juvenile t...",Hyperosmolality and/or hypernatremia,Hyperosmolality and/or hypernatremia
1583,1584,Caucasian,Female,[70-80),?,Emergency,Discharged to home,Transfer from another health care facility,2,UN,...,No,No,No,No,No,Yes,False,"Diabetes with other specified manifestations, ...","Pressure ulcer, unspecified site",Paroxysmal supraventricular tachycardia


In [5]:
target='readmitted'

with open(r'model_file.yaml') as file:
    documents = yaml.full_load(file)
    print(documents)
feature_list=list(documents[0].values())[0]
model_file=list(documents[1].values())[0]

[{'feature_list': ['number_inpatient', 'number_diagnoses', 'num_lab_procedures', 'num_medications', 'time_in_hospital', 'num_procedures', 'number_outpatient', 'number_emergency', 'medical_specialty', 'admission_source_id', 'diag_3_desc', 'diag_1_desc', 'admission_type_id', 'diag_2_desc', 'age', 'discharge_disposition_id']}, {'model_file': 'pipeline_xgb.pkl'}]


In [6]:
model_file

'pipeline_xgb.pkl'

In [7]:
X=input_data[feature_list]
y=input_data[target].astype(int)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# Run predictions

In [9]:
pipe_full = joblib.load(model_file)

In [10]:
test_pred2=pipe_full.predict_proba(X_test)
roc_auc_score(y_test,test_pred2[:,1])

0.7022059912572838