# Playground for Credit Crunch Application

In [2]:
# Loading data in from raw csv
import pandas as pd
raw_df = pd.read_csv('../datasets/Credit_Data_Raw.csv')
raw_df

Unnamed: 0,CHK_ACCT,DURATION,HISTORY,NEW_CAR,USED_CAR,FURNITURE,RADIO_TV,EDUCATION,RETRAINING,AMOUNT,...,AGE,OTHER_INSTALL,RENT,OWN_RES,NUM_CREDITS,JOB,NUM_DEPENDENTS,TELEPHONE,FOREIGN,DEFAULT
0,0,6,4,0,0,0,1,0,0,1169,...,67,0,0,1,2,2,1,1,0,0
1,1,48,2,0,0,0,1,0,0,5951,...,22,0,0,1,1,2,1,0,0,1
2,3,12,4,0,0,0,0,1,0,2096,...,49,0,0,1,1,1,2,0,0,0
3,0,42,2,0,0,1,0,0,0,7882,...,45,0,0,0,1,2,2,0,0,0
4,0,24,3,1,0,0,0,0,0,4870,...,53,0,0,0,2,2,2,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,3,12,2,0,0,1,0,0,0,1736,...,31,0,0,1,1,1,1,0,0,0
996,0,30,2,0,1,0,0,0,0,3857,...,40,0,0,1,1,3,1,1,0,0
997,3,12,2,0,0,0,1,0,0,804,...,38,0,0,1,1,2,1,0,0,0
998,0,45,2,0,0,0,1,0,0,1845,...,23,0,0,0,1,2,1,1,0,1


# Model-Prediction Function

In [3]:
def credit_crunch(converted_data,  return_evaluation=False, model_type=False):
# Credit Crunch is a TensorFlow Neural Network that predicts an applicant's probablity to default on a loan. 
# If default is predicted, a loan denial is returned; otherwise, approved.
# The NN model is dynamically created everytime to match in input data that is imported as a key:value dictionary.
# return_evaluation should be a boolean value (true/false) on whether or not to return model evaluation metrics with function
# Generic NN model parameters can be set in the DEV TOOLS.


    ### DEV TOOLS ###
    return_model_evaluation = return_evaluation
    numpy_seed = 42
    number_inputs = len(converted_data)
    number_classes = 2
    number_hidden_layers = 1
    number_hidden_nodes = number_inputs * (50/30)
    number_epochs = 10
    layer_activation = 'relu'
    classifier_activation = 'softmax'
    learn_metrics = ['accuracy']
    loss_type = 'categorical_crossentropy'
    optimizer_type = 'adam'

    # import dependencies
    import numpy as np
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import LabelEncoder, MinMaxScaler
    from tensorflow.keras.utils import to_categorical
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense



    # setting numpy seed for reproducible results
    np.random.seed(numpy_seed)


    # import train data
    raw_data = pd.read_csv('datasets/Credit_Data_Raw.csv')

    raw_data.dropna()
    
    # defining labels, input fields, and input form data
    X = raw_data.drop('DEFAULT', axis=1)[[item for item in converted_data]]
    y = np.array(raw_data['DEFAULT']).reshape(-1, 1)
    data_bundle = np.array(list([converted_data[item] for item in converted_data])).reshape(1, -1)
    
    # spliting data to test and training sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
    
    
    # creating scaler data 
    X_scaler = MinMaxScaler().fit(X_train)

    
    
    # loading pre-defined model
    if model_type:
        
        from tensorflow.keras.models import load_model

        model = load_model("models/DEFAULT_model_trained_top_10.h5")
        
        model_loss = 0.5177
        model_accuracy = 0.7680
    
    
    
    # creating dynamic model based on passed data bundle
    else:
        
        # scaling data 
        X_train_scaled = X_scaler.transform(X_train)
        X_test_scaled = X_scaler.transform(X_test)
        
        
        # one-hot-encoding labels
        label_encoder = LabelEncoder()
        label_encoder.fit(y_train)
        encoded_y_train = label_encoder.transform(y_train)
        encoded_y_test = label_encoder.transform(y_test)
        y_train_categorical = to_categorical(encoded_y_train)
        y_test_categorical = to_categorical(encoded_y_test)
        
        
        # instantiating Neural Net Model
        model = Sequential()

        # adding input layer
        model.add(Dense(units=number_hidden_nodes, activation=layer_activation, input_dim=number_inputs))

        # adding hidden layers
        for layer in np.arange(0, number_hidden_layers):
            model.add(Dense(units=number_hidden_nodes, activation=layer_activation))

        # adding classifier layer
        model.add(Dense(units=number_classes, activation=classifier_activation))

        # compiling model
        model.compile(optimizer=optimizer_type, loss=loss_type, metrics=learn_metrics)

        # fitting model to training data
        model.fit(
            X_train_scaled,
            y_train_categorical,
            epochs=number_epochs,
            shuffle=True,
            verbose=0
        )
    
        # evaluating dynamic model
        model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=0)

    # scaling data bundle
    data_bundle_scaled = X_scaler.transform(data_bundle)
    
    # predicting approval for user (1st # is Approval Probability or 2nd # is Default Probability)
    crunchies = model.predict(data_bundle_scaled)

    
    # returning model evaluation if turned on
    if return_model_evaluation:
        
        return crunchies, model_loss, model_accuracy
    
    else:
    
        return crunchies
    

# Approval Check Function

In [4]:
def approval_check(crunchies, model_accuracy):
# approval_check determines the approval status of an applicant based on the approval probability and determining model accuracy
    
    # hard coding approval status variables
    sufficient_accuracy = 0.75
    approved_probability = 0.5

    # extracting approval probability from crunchies (class prediction probabilities); 
    # 1st value, [0], is approval probabilty
    # 2nd value, [1], is default probability
    approval_probability = crunchies[0][0]

        
    if model_accuracy > sufficient_accuracy:
        if approval_probability >= approved_probability:
            return("Approval")
        else:
            return('Declined')

    else:
        if approval_probability >= approved_probability:
            return('Declined')
        else:
            return('Approval')
    


# Creating Synthetic Data Bundles from provided dataset

In [6]:
# This cell grabs the first # rows (dp_count) and packages them so they can be input into the function
import numpy as np

dp_count = 100
dp_list = []
for item in np.arange(0, dp_count):
    t1 = raw_df.drop('DEFAULT', axis=1).iloc[item]
    dp = {}
    
    for index, item in enumerate(raw_df.columns.drop(['DEFAULT'])):
        dp[item] = t1[index]
    dp_list.append(dp)
dp_list

[{'CHK_ACCT': 0,
  'DURATION': 6,
  'HISTORY': 4,
  'NEW_CAR': 0,
  'USED_CAR': 0,
  'FURNITURE': 0,
  'RADIO_TV': 1,
  'EDUCATION': 0,
  'RETRAINING': 0,
  'AMOUNT': 1169,
  'SAV_ACCT': 4,
  'EMPLOYMENT': 4,
  'INSTALL_RATE': 4,
  'MALE_DIV': 0,
  'MALE_SINGLE': 1,
  'MALE_MAR_or_WID': 0,
  'CO_APPLICANT': 0,
  'GUARANTOR': 0,
  'PRESENT_RESIDENT': 4,
  'REAL_ESTATE': 1,
  'PROP_UNKN_NONE': 0,
  'AGE': 67,
  'OTHER_INSTALL': 0,
  'RENT': 0,
  'OWN_RES': 1,
  'NUM_CREDITS': 2,
  'JOB': 2,
  'NUM_DEPENDENTS': 1,
  'TELEPHONE': 1,
  'FOREIGN': 0},
 {'CHK_ACCT': 1,
  'DURATION': 48,
  'HISTORY': 2,
  'NEW_CAR': 0,
  'USED_CAR': 0,
  'FURNITURE': 0,
  'RADIO_TV': 1,
  'EDUCATION': 0,
  'RETRAINING': 0,
  'AMOUNT': 5951,
  'SAV_ACCT': 0,
  'EMPLOYMENT': 2,
  'INSTALL_RATE': 2,
  'MALE_DIV': 0,
  'MALE_SINGLE': 0,
  'MALE_MAR_or_WID': 0,
  'CO_APPLICANT': 0,
  'GUARANTOR': 0,
  'PRESENT_RESIDENT': 2,
  'REAL_ESTATE': 1,
  'PROP_UNKN_NONE': 0,
  'AGE': 22,
  'OTHER_INSTALL': 0,
  'RENT': 0,
 

# Looping through Data Bundles for Evaluation

In [267]:
# This cell takes the dp_list from previous cell and loops through them, predicting 1 at a time (like our app will)
tests_df = pd.DataFrame(columns={'Actual', 'Result', 'MA', 'ML', 'Approval_Prob'})

for index, item in enumerate(dp_list):
    print(f'Number of Inputs:{len(item)}')
    test, ml, ma= credit_crunch(item, True, False)
    actual = df['DEFAULT'][index]
    results = approval_check(test, ma)
    print(actual, test, results, ma, ml)
    
    current_test= pd.DataFrame({'Actual':[actual], 'Result':[results], 'MA':[ma], 'ML':[ml], 'Approval_Prob':[test[0][0]]})
    tests_df = tests_df.append(current_test)
#     tests_df.to_csv('datasets/approval.csv')

Number of Inputs:30
0 [[0.98193604 0.018064  ]] Approval 0.764 0.4769752697944641
Number of Inputs:30
1 [[0.61136353 0.38863647]] Approval 0.772 0.4779868814945221
Number of Inputs:30
0 [[0.96448886 0.0355112 ]] Approval 0.764 0.46832211685180664
Number of Inputs:30
0 [[0.49976784 0.50023216]] Declined 0.784 0.4799963989257813
Number of Inputs:30
1 [[0.19343334 0.80656666]] Approval 0.744 0.49181829047203063
Number of Inputs:30
0 [[0.77025425 0.2297458 ]] Approval 0.764 0.47376262593269347
Number of Inputs:30
0 [[0.8938669  0.10613316]] Declined 0.748 0.5041798486709594
Number of Inputs:30
0 [[0.6300898  0.36991012]] Approval 0.772 0.4734583866596222
Number of Inputs:30
0 [[0.97868836 0.02131168]] Approval 0.78 0.47456085205078125
Number of Inputs:30
1 [[0.20986351 0.7901365 ]] Declined 0.764 0.4734839415550232
Number of Inputs:30
1 [[0.2957965 0.7042034]] Declined 0.784 0.450181901216507
Number of Inputs:30
1 [[0.15413694 0.84586304]] Declined 0.78 0.4700547568798065
Number of Inputs:

# Model Evaluations

In [268]:
# Confusion Matrix Averages Equivilent
tests_df.groupby(['Actual', 'Result']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Approval_Prob,MA,ML
Actual,Result,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Approval,0.826081,0.7716,0.474351
0,Declined,0.703013,0.751467,0.48552
1,Approval,0.619114,0.767556,0.472745
1,Declined,0.293893,0.76675,0.475123


In [269]:
# General Data Evaluation
tests_df.describe()

Unnamed: 0,Approval_Prob,MA,ML
count,100.0,100.0,100.0
mean,0.703843,0.76744,0.476006
std,0.245941,0.014815,0.010231
min,0.111201,0.72,0.450182
25%,0.546432,0.76,0.470844
50%,0.793123,0.768,0.475009
75%,0.904616,0.776,0.481511
max,0.989195,0.8,0.50418


In [270]:
# Confusion Matrix Equivilent
tests_df.groupby(['Actual', 'Result']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Approval_Prob,MA,ML
Actual,Result,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Approval,60,60,60
0,Declined,15,15,15
1,Approval,9,9,9
1,Declined,16,16,16


# Model Variable Evaluation
Testing number of nodes, layers, and epochs to get best model

In [47]:
'''
Credit Crunch
Author: Andrew McKinney
Creation Date: 2020-04-28
'''

import numpy as np
import warnings
warnings.filterwarnings("ignore")

print(f'''layers, nodes, epochs: accuracy''')

max_layers = 5
max_nodes = 90
max_epochs = 100

# results_df = pd.DataFrame(columns={'Accuracy', 'Loss', 'Layers', 'Nodes', 'Epochs'})

for num_lays in np.arange(1, max_layers + 1):

    for num_nodes in np.arange(10, max_nodes + 1, 20):
        
        for num_epochs in np.arange(10, max_epochs + 1, 20):
            
            ### DEV TOOLS ###
            numpy_seed = 42
            number_inputs = 30
            number_classes = 2
            number_hidden_layers = num_lays
            number_hidden_nodes = num_nodes
            number_epochs = num_epochs
            layer_activation = 'relu'
            classifier_activation = 'softmax'
            learn_metrics = ['accuracy']
            loss_type = 'categorical_crossentropy'
            optimizer_type = 'adam'

            # import dependencies
            import numpy as np
            import pandas as pd
            from sklearn.model_selection import train_test_split
            from sklearn.preprocessing import LabelEncoder, MinMaxScaler
            from tensorflow.keras.utils import to_categorical
            from tensorflow.keras.models import Sequential
            from tensorflow.keras.layers import Dense



            # setting numpy seed for reproducible results
            np.random.seed(numpy_seed)


            # import train data
            raw_data = pd.read_csv('datasets/Credit_Data_Raw.csv')

            raw_data.dropna()

            # defining labels, input fields, and input form data
            X = raw_data.drop('DEFAULT', axis=1)
            y = np.array(raw_data['DEFAULT']).reshape(-1, 1)

            # spliting data to test and training sets
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


            # scaling data 
            X_scaler = MinMaxScaler().fit(X_train)
            X_train_scaled = X_scaler.transform(X_train)
            X_test_scaled = X_scaler.transform(X_test)


            # one-hot-encoding output labels
            label_encoder = LabelEncoder()
            label_encoder.fit(y_train)
            encoded_y_train = label_encoder.transform(y_train)
            encoded_y_test = label_encoder.transform(y_test)
            y_train_categorical = to_categorical(encoded_y_train)
            y_test_categorical = to_categorical(encoded_y_test)

            # instantiating Neural Net Model
            model = Sequential()

            # adding input layer
            model.add(Dense(units=number_hidden_nodes, activation=layer_activation, input_dim=number_inputs))

            # adding hidden layers
            for layer in np.arange(0, number_hidden_layers):
                model.add(Dense(units=number_hidden_nodes, activation=layer_activation))

            # adding classifier layer
            model.add(Dense(units=number_classes, activation=classifier_activation))

            # compiling model
            model.compile(optimizer=optimizer_type, loss=loss_type, metrics=learn_metrics)

            # fitting model to training data
            model.fit(
                X_train_scaled,
                y_train_categorical,
                epochs=number_epochs,
                shuffle=True,
                verbose=0
            )


            model_eval = model.evaluate(X_test_scaled, y_test_categorical, verbose=0)
            model_accuracy = model_eval[1]
            model_loss = model_eval[0]
            
            print(f'''{num_lays}, {num_nodes}, {num_epochs}: {model_accuracy}''')
        
            current_result = pd.DataFrame({'Accuracy':[model_accuracy], 'Loss':[model_loss], 'Layers':[num_lays], 'Nodes':[num_nodes], 'Epochs':[num_epochs]})
            results_df = results_df.append(current_result)
            
            results_df.to_csv('datasets/NN_Evaluation.csv')


layers, nodes, epochs: accuracy
1, 10, 10: 0.7160000205039978
1, 10, 30: 0.7680000066757202
1, 10, 50: 0.7799999713897705
1, 10, 70: 0.7480000257492065
1, 10, 90: 0.7239999771118164
1, 30, 10: 0.7519999742507935
1, 30, 30: 0.7599999904632568
1, 30, 50: 0.7239999771118164
1, 30, 70: 0.7599999904632568
1, 30, 90: 0.7440000176429749
1, 50, 10: 0.7599999904632568
1, 50, 30: 0.7760000228881836
1, 50, 50: 0.7239999771118164
1, 50, 70: 0.7480000257492065
1, 50, 90: 0.7239999771118164
1, 70, 10: 0.7720000147819519
1, 70, 30: 0.7680000066757202
1, 70, 50: 0.7559999823570251
1, 70, 70: 0.7239999771118164
1, 70, 90: 0.7039999961853027
1, 90, 10: 0.7400000095367432
1, 90, 30: 0.7480000257492065
1, 90, 50: 0.6840000152587891
1, 90, 70: 0.7080000042915344
1, 90, 90: 0.671999990940094
2, 10, 10: 0.7239999771118164
2, 10, 30: 0.7519999742507935
2, 10, 50: 0.7599999904632568
2, 10, 70: 0.7799999713897705
2, 10, 90: 0.7400000095367432
2, 30, 10: 0.7639999985694885
2, 30, 30: 0.7519999742507935
2, 30, 50

In [180]:
# Sorting through Accuracy and Loss for Optimal Inputs
results_df.sort_values('Loss', ascending=False)

Unnamed: 0,Accuracy,Epochs,Layers,Loss,Nodes
0,0.676,90,4,3.633299,30
0,0.724,70,5,3.270233,90
0,0.720,90,5,3.106154,50
0,0.712,90,4,3.041902,70
0,0.700,90,4,2.917255,70
...,...,...,...,...,...
0,0.756,10,3,0.477906,30
0,0.756,30,1,0.473475,10
0,0.772,10,1,0.472219,70
0,0.752,10,3,0.470935,30


In [177]:
# Grouping by Inputs (Nodes, Layers, Epochs) for Accuracy and Loss
results_df.groupby(['Nodes']).mean()

Unnamed: 0_level_0,Accuracy,Loss
Nodes,Unnamed: 1_level_1,Unnamed: 2_level_1
10,0.7448,0.567095
30,0.73424,1.212214
50,0.72888,1.514524
70,0.7272,1.605138
90,0.72736,1.71906
