### SVM Modelling on hyperglycemic data

In [21]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

SEED=90089

In [3]:
# Read Data
patient_df = pd.read_csv('../data/hyperglycemic_patients_w_categories.csv')

# print detail summary
print("Dataframe shape:   ", patient_df.shape)
print("Dataframe Features:", patient_df.columns)
patient_df.head()

Dataframe shape:    (14077, 18)
Dataframe Features: Index(['Unnamed: 0', 'subject_id', 'stay_id', 'glucose_max', 'anchor_age',
       'dod', 'gender', 'dbp_mean', 'sbp_mean', 'glucose_mean',
       'heart_rate_mean', 'spo2_mean', 'resp_rate_mean', 'temperature_mean',
       'apsiii', 'glucose_score', 'avg_bmi_value', 'label'],
      dtype='object')


Unnamed: 0.1,Unnamed: 0,subject_id,stay_id,glucose_max,anchor_age,dod,gender,dbp_mean,sbp_mean,glucose_mean,heart_rate_mean,spo2_mean,resp_rate_mean,temperature_mean,apsiii,glucose_score,avg_bmi_value,label
0,0,12041046,31249096,277.0,36,0,0,64.32,112.88,221.272727,101.8,92.0,17.346154,37.18625,38,3,26.75,0
1,1,17421995,38100564,2340.0,35,0,0,77.769231,122.961538,468.592593,105.361111,96.093023,19.813953,36.727586,87,5,30.558477,0
2,2,10352416,35043893,531.0,23,1,0,64.169118,119.661765,275.352941,154.028986,83.698413,25.584615,39.725,158,5,30.558477,1
3,3,16924291,34261137,259.0,19,1,1,49.3,74.2,259.0,89.75,70.0,26.333333,31.9,88,3,30.558477,0
4,4,15705944,37056020,406.0,24,0,0,57.333333,92.848485,292.0,86.631579,96.162162,15.289474,33.59,150,5,30.558477,1


In [19]:
numerical_features = ['glucose_max', 'anchor_age', 'dbp_mean', 'anchor_age',
                    'sbp_mean', 'glucose_mean', 'heart_rate_mean', 'spo2_mean', 
                    'resp_rate_mean', 'temperature_mean', 'apsiii', 'glucose_score', 'avg_bmi_value']

# Standardized variable for consistent measurement across numerical values
scaler = StandardScaler()
X_std = scaler.fit_transform(patient_df[numerical_features])

# Interpolate categorical variables
X_cat = patient_df[['dod', 'anchor_age']]


X = np.concatenate([X_std, X_cat.to_numpy()], axis=1)
y = patient_df["label"]
print("X Standardized data Shape: ", X_std.shape)
print("X Categorical data shape:  ", X_cat.shape)
print("X, y shape:                ", X.shape, y.shape)

X Standardized data Shape:  (14077, 13)
X Categorical data shape:   (14077, 2)
X, y shape:                 (14077, 15) (14077,)


In [20]:
# Split data based on training 
X_train, y_train, X_test, y_test = train_test_split(X, y, 
                                                    train_size=0.8, 
                                                    random_state=SEED)

In [None]:
# Instantiate SVC classifier
svc_clf = SVC(gamma='auto', decision_function_shape='ovr', random_state=SEED)