In [1]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

In [2]:
df = pd.read_parquet('clean.parquet')

### Splitting Output from Input Space

In [3]:
X = df.drop('Response', axis= 1) 
y = df['Response']

### Splitting Training and Test

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state = 42)

### Modelling

In [5]:
#Standardization
sc = StandardScaler()
X_train = sc.fit_transform(X_train)

In [6]:
clf = LogisticRegression(class_weight='balanced').fit(X_train, y_train)
y_preds = clf.predict(sc.transform(X_test))

In [7]:
print(clf.coef_)
X_test.dtypes

[[ 0.2248093  -0.09545002 -0.8613376   0.06424629 -0.01790436  1.04406989
  -0.2730761  -0.17492636  0.68350952 -0.36027613  0.12669027 -0.39241776
   0.05107729]]


Education              int64
Income               float64
Recency                int64
NumDealsPurchases      int64
Complain               int64
MntSpent               int64
NumPurchases           int64
Age                    int64
DaysEnrolled           int64
Married                 bool
Single                  bool
Together                bool
Widow                   bool
dtype: object

### Evaluation

In [8]:
acc = accuracy_score(y_test, y_preds)
prec = precision_score(y_test, y_preds)
rec = recall_score(y_test, y_preds)
f1 = f1_score(y_test, y_preds)
auc = roc_auc_score(y_test, y_preds)
cm = confusion_matrix(y_preds, y_test, labels=[1,0])
print(cm)

[[ 40  92]
 [ 21 288]]


In [9]:
print("Accuracy: %.4f" % acc)
print("Precision: %.4f" % prec)
print("Recall: %.4f" % rec)
print("F1: %.4f" % f1)
print("AUC: %.4f" % auc)

Accuracy: 0.7438
Precision: 0.3030
Recall: 0.6557
F1: 0.4145
AUC: 0.7068
