# SVM:

Classifying with supervised learning whether diabetic patients are readmitted, and if they are, if it's before or after 30 days.

Using the dataset from here: https://archive.ics.uci.edu/ml/datasets/Diabetes+130-US+hospitals+for+years+1999-2008

In [1]:
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn import metrics
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import numpy as np
import pickle

Using TensorFlow backend.


In [2]:
with open("y_train_liv.pkl", 'rb') as picklefile: 
    y_train = pickle.load(picklefile)

with open("y_test_liv.pkl", 'rb') as picklefile: 
    y_test = pickle.load(picklefile)

with open("x_train_scaled_liv.pkl", 'rb') as picklefile: 
    x_train = pickle.load(picklefile)

with open("x_test_scaled_liv.pkl", 'rb') as picklefile: 
    x_test = pickle.load(picklefile)

## Binarizing the classes

In [3]:
y_test = y_test.str.replace('>30','NO')
y_train = y_train.str.replace('>30','NO')

## SVM with train test split and balanced class weights

In [4]:
linearSVMmodel = svm.LinearSVC(class_weight="balanced")
linearSVMmodel.fit(x_train, y_train)

y_pred = linearSVMmodel.predict(x_test)
print(metrics.classification_report(y_test, y_pred))



              precision    recall  f1-score   support

         <30       0.19      0.56      0.28      2839
          NO       0.92      0.69      0.79     22190

    accuracy                           0.68     25029
   macro avg       0.56      0.62      0.54     25029
weighted avg       0.84      0.68      0.73     25029



## Kernel RBF SVM with balanced class weights

In [5]:
RBFSVMmodel = svm.SVC(class_weight="balanced")
RBFSVMmodel.fit(x_train, y_train)

y_pred = RBFSVMmodel.predict(x_test)

print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         <30       0.19      0.56      0.28      2839
          NO       0.93      0.70      0.79     22190

    accuracy                           0.68     25029
   macro avg       0.56      0.63      0.54     25029
weighted avg       0.84      0.68      0.74     25029



## SMOTE:

In [6]:
sm = SMOTE(random_state=42)
x_train_smote, y_train_smote = sm.fit_sample(x_train, y_train)



## Linear SVM with SMOTE:

In [7]:
linearSVMmodel = svm.LinearSVC()
linearSVMmodel.fit(x_train_smote, y_train_smote)

y_pred = linearSVMmodel.predict(x_test)

print(metrics.classification_report(y_test, y_pred))



              precision    recall  f1-score   support

         <30       0.17      0.57      0.27      2839
          NO       0.92      0.65      0.76     22190

    accuracy                           0.64     25029
   macro avg       0.55      0.61      0.52     25029
weighted avg       0.84      0.64      0.71     25029



## Kernel RBF SVM with SMOTE:

In [8]:
RBFSVMmodel = svm.SVC()
RBFSVMmodel.fit(x_train_smote, y_train_smote)

y_pred = RBFSVMmodel.predict(x_test)

print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         <30       0.32      0.10      0.15      2839
          NO       0.89      0.97      0.93     22190

    accuracy                           0.87     25029
   macro avg       0.61      0.54      0.54     25029
weighted avg       0.83      0.87      0.84     25029



## Random undersampling:

In [9]:
rus = RandomUnderSampler(random_state=0)
x_train_undersampled, y_train_undersampled = rus.fit_sample(x_train, y_train)



## Linear SVM with random undersampling:

In [10]:
linearSVMmodel = svm.LinearSVC()
linearSVMmodel.fit(x_train_undersampled, y_train_undersampled)

y_pred = linearSVMmodel.predict(x_test)

print(metrics.classification_report(y_test, y_pred))



              precision    recall  f1-score   support

         <30       0.18      0.57      0.28      2839
          NO       0.93      0.67      0.78     22190

    accuracy                           0.66     25029
   macro avg       0.55      0.62      0.53     25029
weighted avg       0.84      0.66      0.72     25029



## Kernel RBF SVM with random undersampling:

In [11]:
RBFSVMmodel = svm.SVC()
RBFSVMmodel.fit(x_train_undersampled, y_train_undersampled)

y_pred = RBFSVMmodel.predict(x_test)

print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         <30       0.19      0.60      0.28      2839
          NO       0.93      0.66      0.77     22190

    accuracy                           0.66     25029
   macro avg       0.56      0.63      0.53     25029
weighted avg       0.84      0.66      0.72     25029

