In [5]:
from AdvancedAnalytics import ReplaceImputeEncode, NeuralNetwork
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np


df = pd.read_excel("CreditHistory_Clean.xlsx")

# Data map
attribute_map = { 
    'age':['I',(19,120)], 
    'amount': ['I',(0,20000)], 
    'checking': ['N',(1,2,3,4)], 
    'coapp': ['N',(1,2,3)], 
    'depends': ['B',(1,2)], 
    'duration': ['I',(1,72)], 
    'employed': ['N',(1,2,3,4,5)], 
    'existcr': ['N',(1,2,3,4)], 
    'foreign': ['B',(1,2)], 
    'good_bad': ['B',('bad','good')], 
    'history': ['N',(0,1,2,3,4)], 
    'housing':['N',(1,2,3)], 
    'installp': ['N',(1,2,3,4)], 
    'job': ['N',(1,2,3,4)], 
    'marital': ['N',(1,2,3,4)], 
    'other': ['N',(1,2,3)], 
    'property': ['N',(1,2,3,4)], 
    'purpose': ['N',('0', '1','2','3','4','5','6', '8','9','X')],
    'resident': ['N',(1,2,3,4)], 
    'savings': ['N',(1,2,3,4,5)], 
    'telephon': ['B',(1,2)] 
    }

# Data preprocessing
rie = ReplaceImputeEncode(data_map=attribute_map, nominal_encoding='one-hot', interval_scale=None, drop=False, display=True)
encoded_df = rie.fit_transform(df)

# Feature and Target
y = encoded_df['good_bad']
X = encoded_df.drop('good_bad',axis=1)
np_y = np.ravel(y)

# Cross-Validation
best_network = 0
best_alpha = 0
best_F1 = 0
network_list = [(3), (11), (5,4), (6,5), (7,6), (8,7)]
alpha_list = [0, 0.1, 0.15, 0.2, 0.25, 0.3]
score_list = ['accuracy', 'recall', 'precision', 'f1']
for nn in network_list:
    print("\nNetwork: ", nn)
    for a in alpha_list:
        print("\nAlpha: ", a)
        fnn = MLPClassifier(hidden_layer_sizes=nn, activation='tanh', alpha=a, tol=1e-32, solver='lbfgs', max_iter=5000, random_state=12345)
        scores = cross_validate(fnn, X, np_y, scoring=score_list, return_train_score=False, cv=10)
        print("{:.<13s}{:>6s}{:>13s}".format("Metric", "Mean", "Std. Dev."))
        for s in score_list:
            var = "test_"+s
            mean = scores[var].mean()
            std = scores[var].std()
            print("{:.<13s}{:>7.4f}{:>10.4f}".format(s, mean, std))
            if s == "f1" and best_F1 < mean:
                best_F1 = mean
                best_network = nn
                best_alpha = a


********** Data Preprocessing ***********
Features Dictionary Contains:
3 Interval, 
4 Binary, 
14 Nominal, and 
0 Excluded Attribute(s).

Data contains 1000 observations & 21 columns.


Attribute Counts
............... Missing  Outliers
age.......         0         0
amount....         0         0
checking..         0         0
coapp.....         0         0
depends...         0         0
duration..         0         0
employed..         0         0
existcr...         0         0
foreign...         0         0
good_bad..         0         0
history...         0         0
housing...         0         0
installp..         0         0
job.......         0         0
marital...         0         0
other.....         0         0
property..         0         0
purpose...         0         0
resident..         0         0
savings...         0         0
telephon..         0         0

Network:  3

Alpha:  0
Metric.......  Mean    Std. Dev.
accuracy..... 0.7000    0.0000
recall....... 1.0000  

In [7]:
print("The Best Network with F1=", best_F1)
print(" Perceptrons: ", best_network)
print(" Alpha:", best_alpha)

The Best Network with F1= 0.8407678776981113
 Perceptrons:  (8, 7)
 Alpha: 0.15


In [8]:
# Evaluate the model
X_train, X_validate, y_train, y_validate = train_test_split(X, np_y,test_size = 0.3, random_state=7)
fnn = MLPClassifier(hidden_layer_sizes=best_network, activation='tanh', alpha=best_alpha, tol=1e-32, solver='lbfgs', max_iter=5000, random_state=12345)
fnn = fnn.fit(X_train,y_train)
NeuralNetwork.display_binary_split_metrics(fnn, X_train, y_train, X_validate, y_validate)



Model Metrics..........       Training     Validation
Observations...........            700            300
Features...............             68             68
Number of Layers.......              2              2
Number of Outputs......              1              1
Number of Neurons......             15             15
Number of Weights......            623            623
Number of Iterations...             65             65
Activation Function....       logistic       logistic
Mean Absolute Error....         0.4279         0.4153
Avg Squared Error......         0.2139         0.2013
Accuracy...............         0.6900         0.7233
Precision..............         0.6900         0.7233
Recall (Sensitivity)...         1.0000         1.0000
F1-score...............         0.8166         0.8395
MISC (Misclassification)...      31.0%          27.7%
     class 0...............     100.0%         100.0%
     class 1...............       0.0%           0.0%


Training
Confusion Matri