# [UCI ML Repo - credit card defaults](http://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients#)
## Evaluation
### Platform: Python 3, colab.research.google.com

In [0]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from joblib import dump, load
from google.colab import drive
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, log_loss
# from sklearn.model_selection import train_test_split, GridSearchCV
# from sklearn.preprocessing import StandardScaler
# from sklearn.pipeline import Pipeline

## Load data

In [12]:
drive.mount('/content/gdrive', force_remount=False)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [13]:
loc_data = "/content/gdrive/My Drive/Colab Notebooks/uci-credit-card-defaults/data"
X_test = pd.read_csv("{}/defaults_clean_test_X.csv".format(loc_data), header=0)
print(X_test.shape)
X_test.head(5)

(9000, 23)


Unnamed: 0,limit_bal,sex,education,marriage,age,pay_2,pay_3,pay_4,pay_5,pay_6,...,bill_amt4,bill_amt5,bill_amt6,pay_amt1,pay_amt2,pay_amt3,pay_amt4,pay_amt5,pay_amt6,pay_1
0,30000.0,1,2,2,25,0,0,0,0,0,...,12580.0,13716.0,14828.0,1500.0,2000.0,1500.0,1500.0,1500.0,2000.0,0
1,150000.0,2,1,2,26,0,0,0,0,0,...,101581.0,77741.0,77264.0,4486.0,4235.0,3161.0,2647.0,2669.0,2669.0,0
2,70000.0,2,3,1,32,0,0,0,0,0,...,69753.0,70111.0,70212.0,2431.0,3112.0,3000.0,2438.0,2500.0,2554.0,0
3,130000.0,1,3,2,49,0,0,0,0,0,...,16898.0,11236.0,6944.0,1610.0,1808.0,7014.0,27.0,7011.0,4408.0,0
4,50000.0,2,2,2,36,0,0,0,0,2,...,19574.0,20295.0,19439.0,2000.0,1500.0,1000.0,1800.0,0.0,1000.0,0


In [14]:
loc_data = "/content/gdrive/My Drive/Colab Notebooks/uci-credit-card-defaults/data"
y_test = pd.read_csv("{}/defaults_clean_test_y.csv".format(loc_data), header=None)
print(y_test.shape)
y_test.head(5)

(9000, 1)


Unnamed: 0,0
0,0
1,0
2,0
3,0
4,1


## Helper classes

In [0]:
def view_metrics(y_test, y_pred):
    print("Accuracy: {}".format(accuracy_score(y_test, y_pred)))
    print("Precision: {}".format(precision_score(y_test, y_pred)))
    print("Recall: {}".format(recall_score(y_test, y_pred)))
    print("F1: {}".format(f1_score(y_test, y_pred)))

In [0]:
class LogCol():
    """
    Log transforms column 'col_name' in a pipeline
    """
    def __init__(self, col_name):
        self.col_name = col_name
    
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X = X.copy(deep=True)
        data_to_transform = X.loc[:, self.col_name]
        X.loc[:, self.col_name] = np.log(data_to_transform)
        return X

In [0]:
class StdCol():
    """
    Standardizes column 'col_name' in a pipeline
    """
    def __init__(self, col_name):
        self.col_name = col_name
    
    def fit(self, X, y=None):
        self.ss = StandardScaler()
        self.ss.fit(X.loc[:, self.col_name].values.reshape(-1, 1))
        return self

    def transform(self, X):
        X.loc[:, self.col_name] = self.ss.transform(X.loc[:, self.col_name].values.reshape(-1, 1))
        return X

In [0]:
class CategoricalColInt():
    """
    Tranforms column 'col_name' into n-1 categorical columns
    """
    def __init__(self, col_name):
        self.col_name = col_name
    
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X = X.copy(deep=True)
        dummies = pd.get_dummies(X.loc[:, self.col_name], prefix=self.col_name)
        dummies_cols = list(dummies.columns)[1:] # drop last new category to avoid feature correlation
        X = X.merge(dummies.loc[:,dummies_cols], left_index=True, right_index=True)
        X = X.drop(columns=self.col_name, axis=1)
        global transformed_features
        transformed_features = X
        return X

## Evaluation

In [0]:
loc_model = "/content/gdrive/My Drive/Colab Notebooks/uci-credit-card-defaults"
model = load("{}/model.joblib".format(loc_model))

In [20]:
y_pred = model.predict(X_test)
view_metrics(y_test, y_pred)

Accuracy: 0.8166666666666667
Precision: 0.6659528907922913
Recall: 0.3173469387755102
F1: 0.4298548721492743
