In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RepeatedStratifiedKFold, cross_validate

%matplotlib inline

In [2]:
# Loading Data
df = pd.read_csv('UCI_Credit_Card_balanced.csv')

In [4]:
# Scaling Data
scaler = StandardScaler()
scaler.fit(df.drop('Y',axis=1))
scaled_features = scaler.fit_transform(df.drop('Y',axis=1))

In [7]:
# Seperate features and response variable
X = pd.DataFrame(scaled_features,columns=df.columns[:-1]).values
y = df['Y'].values

In [10]:
# Define model
logmodel = LogisticRegression(C=0.1, penalty='l2')

# Fit Model
t0 = datetime.now()
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=69)
scores = cross_validate(logmodel, X, y, scoring=['accuracy','roc_auc','precision','recall','f1'], cv=cv, n_jobs=-1)
t1 = datetime.now()

In [11]:
print('Mean Accuracy: %.2f%% Std Dev: (+/- %.3f%%)' % (np.mean(scores['test_accuracy']*100), np.std(scores['test_accuracy']*100)))
print('Mean ROC AUC: %.2f%% Std Dev: (+/- %.3f%%)' % (np.mean(scores['test_roc_auc']*100), np.std(scores['test_roc_auc']*100)))
print('Mean Precision: %.2f%% Std Dev: (+/- %.3f%%)' % (np.mean(scores['test_precision']*100), np.std(scores['test_precision']*100)))
print('Mean Recall: %.2f%% Std Dev: (+/- %.3f%%)' % (np.mean(scores['test_recall']*100), np.std(scores['test_recall']*100)))
print('Mean F1 Score: %.2f%% Std Dev: (+/- %.3f%%)' % (np.mean(scores['test_f1']*100), np.std(scores['test_f1']*100)))
print("Execution Time: ", (t1 - t0))

Mean Accuracy: 72.08% Std Dev: (+/- 0.655%)
Mean ROC AUC: 78.57% Std Dev: (+/- 0.658%)
Mean Precision: 71.54% Std Dev: (+/- 0.797%)
Mean Recall: 68.20% Std Dev: (+/- 0.986%)
Mean F1 Score: 69.83% Std Dev: (+/- 0.741%)
Execution Time:  0:00:06.204483
