In [None]:
import pandas as pd
import numpy as np
import joblib
import seaborn as sns
import matplotlib.pyplot as plt

# custom classes for cross validation and scoring
import cross_validation
import performance_metrics

from sklearn import svm
from sklearn.model_selection import KFold
from pickle import load

In [None]:
data = pd.read_csv(r"../data/processed_data/complete_data.csv", index_col=0)


In [None]:
x = data.drop(["TARGET"], axis=1)
y = data.filter(["TARGET"], axis=1)

scaler = load(open(r"scaler.pkl", "rb"))
x_scaled = scaler.transform(x)
y = y.values.reshape(y.shape[0])

x_scaled.shape, y.shape


In [None]:
kfold = KFold(n_splits=5, shuffle=True, random_state=663)

model = svm.SVC()

# run cross validation for the model
cv = cross_validation.cross_val(x_scaled, y)
results, confusion_matrix = cv.run_validation(kfold=kfold, model=model)

In [None]:
results.round(4) * 100

In [None]:
sns.heatmap(confusion_matrix, annot=True, cmap='Blues', fmt='d')

# Set labels, title, and axis ticks
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')

tick_labels = ['Default', 'non-default']
tick_positions = [0, 1]
plt.xticks(ticks=[pos + 0.5 for pos in tick_positions], labels=tick_labels)
plt.yticks(ticks=[pos + 0.5 for pos in tick_positions], labels=tick_labels)

# Show the plot
plt.show()

In [None]:
# joblib_file = r"logistic_regression/LogisticRegression_v1.03.pkl"
# joblib.dump(clf, joblib_file)
