In [None]:
import pickle
from imblearn.under_sampling import RandomUnderSampler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV

In [None]:
X_train = pickle.load( open( "../data/X_train.p", "rb" ) )
X_test = pickle.load( open( "../data/X_test.p", "rb" ) )
y_train = pickle.load( open( "../data/y_train.p", "rb" ) )
y_test = pickle.load( open( "../data/y_test.p", "rb" ) )

### no resampling without hyperparameter tuning

In [None]:
svc = SVC()

svc.fit(X_train, y_train.values)
y_pred = svc.predict(X_test)

print(y_test, y_pred)
print(y_train.values.ravel())
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

fig,ax = plt.subplots(figsize=(5,4),dpi = 100)
cm = confusion_matrix(y_test,y_pred)
cmp = ConfusionMatrixDisplay(cm,display_labels = ['not readmitted','readmitted'])
cmp.plot(ax = ax)

### Undersampling with no hyperparameter tuning

In [None]:
rus = RandomUnderSampler()
X_train_undersampled, y_train_undersampled = rus.fit_resample(X_train, y_train)

print("Before undersampling:")
print(y_train.value_counts())
print("\nAfter undersampling:")
print(y_train_undersampled.value_counts())

In [None]:
svc = SVC()

svc.fit(X_train_undersampled, y_train_undersampled.values)
y_pred = svc.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

In [None]:
fig, ax = plt.subplots(figsize=(5, 4), dpi=100)
cm = confusion_matrix(y_test, y_pred)
cmp = ConfusionMatrixDisplay(cm, display_labels=['not readmitted within 30', 'readmitted within 30'])
plt.show(ax=ax)

### Oversampling and no hyper parameter tuning

In [ ]:
from imblearn.over_sampling import SMOTE

smote = SMOTE()
X_train_oversampled, y_train_oversampled = smote.fit_resample(X_train, y_train.values.ravel())

print("Before SMOTE:")
print(y_train.value_counts())
print("\nAfter SMOTE:")
print(y_train_oversampled.shape)

In [ ]:
svc = SVC()

svc.fit(X_train_oversampled, y_train_oversampled.ravel())
y_pred = svc.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

fig,ax = plt.subplots(figsize=(5,4),dpi = 100)
cm = confusion_matrix(y_test,y_pred)
cmp = ConfusionMatrixDisplay(cm,display_labels = ['not readmitted','readmitted'])
cmp.plot(ax = ax)

In [None]:
parameters = {'class_weight' : [{0:0.1,1:0.1}, {0:0.1,1:0.3}, {0:0.1,1:0.5}]}
clf = GridSearchCV(svc, parameters, n_jobs=-1, scoring='f1')
clf.fit(X_train_undersampled, y_train_undersampled.values)

In [None]:
print("Best Parameters: ", clf.best_params_)
print("Best cvs score (accuracy): ", clf.cv_results_)