In [62]:
import os
import pandas as pd

# load dataset
filepath = "loan_dataset.csv"
loan_data = pd.read_csv(filepath)
loan_data.columns

Index(['Customer_ID', 'Status_Checking_Acc', 'Duration_in_Months',
       'Credit_History', 'Purposre_Credit_Taken', 'Credit_Amount',
       'Savings_Acc', 'Years_At_Present_Employment', 'Inst_Rt_Income',
       'Marital_Status_Gender', 'Other_Debtors_Guarantors',
       'Current_Address_Yrs', 'Property', 'Age', 'Other_Inst_Plans ',
       'Housing', 'Num_CC', 'Job', 'Dependents', 'Telephone', 'Foreign_Worker',
       'Default_On_Payment', 'Count'],
      dtype='object')

In [63]:
# drop columns that we don't have enough data on
loan_data = loan_data.drop(["Inst_Rt_Income", "Property", "Current_Address_Yrs", "Count"], axis=1)
loan_data.head()


Unnamed: 0,Customer_ID,Status_Checking_Acc,Duration_in_Months,Credit_History,Purposre_Credit_Taken,Credit_Amount,Savings_Acc,Years_At_Present_Employment,Marital_Status_Gender,Other_Debtors_Guarantors,Age,Other_Inst_Plans,Housing,Num_CC,Job,Dependents,Telephone,Foreign_Worker,Default_On_Payment
0,100001,A11,6,A34,A43,1169,A65,A75,A93,A101,67,A143,A152,2,A173,1,A192,A201,0
1,100002,A12,48,A32,A43,5951,A61,A73,A92,A101,22,A143,A152,1,A173,1,A191,A201,0
2,100003,A14,12,A34,A46,2096,A61,A74,A93,A101,49,A143,A152,1,A172,2,A191,A201,0
3,100004,A11,42,A32,A42,7882,A61,A74,A93,A103,45,A143,A153,1,A173,2,A191,A201,0
4,100005,A11,24,A33,A40,4870,A61,A73,A93,A101,53,A143,A153,2,A173,2,A191,A201,1


In [64]:
# bin the 'Duration_in_Months' 
duration_bins = [0, 11, 23, 35, 47, 59, 72]
duration_labels = ['B10', 'B11', 'B12', 'B13', 'B14', 'B15']

loan_data["Duration_binned"] = pd.cut(loan_data["Duration_in_Months"], duration_bins, labels=duration_labels)
loan_data.head()


Unnamed: 0,Customer_ID,Status_Checking_Acc,Duration_in_Months,Credit_History,Purposre_Credit_Taken,Credit_Amount,Savings_Acc,Years_At_Present_Employment,Marital_Status_Gender,Other_Debtors_Guarantors,Age,Other_Inst_Plans,Housing,Num_CC,Job,Dependents,Telephone,Foreign_Worker,Default_On_Payment,Duration_binned
0,100001,A11,6,A34,A43,1169,A65,A75,A93,A101,67,A143,A152,2,A173,1,A192,A201,0,B10
1,100002,A12,48,A32,A43,5951,A61,A73,A92,A101,22,A143,A152,1,A173,1,A191,A201,0,B14
2,100003,A14,12,A34,A46,2096,A61,A74,A93,A101,49,A143,A152,1,A172,2,A191,A201,0,B11
3,100004,A11,42,A32,A42,7882,A61,A74,A93,A103,45,A143,A153,1,A173,2,A191,A201,0,B13
4,100005,A11,24,A33,A40,4870,A61,A73,A93,A101,53,A143,A153,2,A173,2,A191,A201,1,B12


In [65]:
# bin the 'Credit_Amount' column and append to df
credit_amount_bins = [0, 2499, 4999, 7499, 9999, 12499, 14999, 17499, 19999]
credit_amount_labels = ['B20', 'B21','B22','B23','B24','B25','B26','B27']

loan_data["Credit_amount_binned"] = pd.cut(loan_data['Credit_Amount'], credit_amount_bins, labels=credit_amount_labels)
loan_data.head()


Unnamed: 0,Customer_ID,Status_Checking_Acc,Duration_in_Months,Credit_History,Purposre_Credit_Taken,Credit_Amount,Savings_Acc,Years_At_Present_Employment,Marital_Status_Gender,Other_Debtors_Guarantors,...,Other_Inst_Plans,Housing,Num_CC,Job,Dependents,Telephone,Foreign_Worker,Default_On_Payment,Duration_binned,Credit_amount_binned
0,100001,A11,6,A34,A43,1169,A65,A75,A93,A101,...,A143,A152,2,A173,1,A192,A201,0,B10,B20
1,100002,A12,48,A32,A43,5951,A61,A73,A92,A101,...,A143,A152,1,A173,1,A191,A201,0,B14,B22
2,100003,A14,12,A34,A46,2096,A61,A74,A93,A101,...,A143,A152,1,A172,2,A191,A201,0,B11,B20
3,100004,A11,42,A32,A42,7882,A61,A74,A93,A103,...,A143,A153,1,A173,2,A191,A201,0,B13,B23
4,100005,A11,24,A33,A40,4870,A61,A73,A93,A101,...,A143,A153,2,A173,2,A191,A201,1,B12,B21


In [66]:
# bin the 'Age' column and append to df
age_bins = [0, 24, 34, 44, 54, 64, 75]
age_labels = ['B30', 'B31', 'B32', 'B33', 'B34', 'B35']

loan_data["Age_binned"] = pd.cut(loan_data["Age"], age_bins, labels=age_labels)
loan_data.head()

Unnamed: 0,Customer_ID,Status_Checking_Acc,Duration_in_Months,Credit_History,Purposre_Credit_Taken,Credit_Amount,Savings_Acc,Years_At_Present_Employment,Marital_Status_Gender,Other_Debtors_Guarantors,...,Housing,Num_CC,Job,Dependents,Telephone,Foreign_Worker,Default_On_Payment,Duration_binned,Credit_amount_binned,Age_binned
0,100001,A11,6,A34,A43,1169,A65,A75,A93,A101,...,A152,2,A173,1,A192,A201,0,B10,B20,B35
1,100002,A12,48,A32,A43,5951,A61,A73,A92,A101,...,A152,1,A173,1,A191,A201,0,B14,B22,B30
2,100003,A14,12,A34,A46,2096,A61,A74,A93,A101,...,A152,1,A172,2,A191,A201,0,B11,B20,B33
3,100004,A11,42,A32,A42,7882,A61,A74,A93,A103,...,A153,1,A173,2,A191,A201,0,B13,B23,B33
4,100005,A11,24,A33,A40,4870,A61,A73,A93,A101,...,A153,2,A173,2,A191,A201,1,B12,B21,B33


In [67]:
# drop the 'Duration_in_Months', 'Credit_Amount', and 'Age' columns
loan_data = loan_data.drop(["Duration_in_Months", "Credit_Amount", "Age"], axis=1)
loan_data.columns


Index(['Customer_ID', 'Status_Checking_Acc', 'Credit_History',
       'Purposre_Credit_Taken', 'Savings_Acc', 'Years_At_Present_Employment',
       'Marital_Status_Gender', 'Other_Debtors_Guarantors',
       'Other_Inst_Plans ', 'Housing', 'Num_CC', 'Job', 'Dependents',
       'Telephone', 'Foreign_Worker', 'Default_On_Payment', 'Duration_binned',
       'Credit_amount_binned', 'Age_binned'],
      dtype='object')

In [68]:
# adjust'Default_On_Payment' to last column and move int data to front
NN_loan_data = loan_data[['Customer_ID', 'Num_CC', 'Dependents', 'Status_Checking_Acc', 'Credit_History',
       'Purposre_Credit_Taken', 'Savings_Acc', 'Years_At_Present_Employment',
       'Marital_Status_Gender', 'Other_Debtors_Guarantors',
       'Other_Inst_Plans ', 'Housing', 'Job', 
       'Telephone', 'Foreign_Worker', 'Duration_binned',
       'Credit_amount_binned', 'Age_binned', 'Default_On_Payment']]

NN_loan_data.head()

Unnamed: 0,Customer_ID,Num_CC,Dependents,Status_Checking_Acc,Credit_History,Purposre_Credit_Taken,Savings_Acc,Years_At_Present_Employment,Marital_Status_Gender,Other_Debtors_Guarantors,Other_Inst_Plans,Housing,Job,Telephone,Foreign_Worker,Duration_binned,Credit_amount_binned,Age_binned,Default_On_Payment
0,100001,2,1,A11,A34,A43,A65,A75,A93,A101,A143,A152,A173,A192,A201,B10,B20,B35,0
1,100002,1,1,A12,A32,A43,A61,A73,A92,A101,A143,A152,A173,A191,A201,B14,B22,B30,0
2,100003,1,2,A14,A34,A46,A61,A74,A93,A101,A143,A152,A172,A191,A201,B11,B20,B33,0
3,100004,1,2,A11,A32,A42,A61,A74,A93,A103,A143,A153,A173,A191,A201,B13,B23,B33,0
4,100005,2,2,A11,A33,A40,A61,A73,A93,A101,A143,A153,A173,A191,A201,B12,B21,B33,1


In [69]:
NN_loan_data.dtypes

Customer_ID                       int64
Num_CC                            int64
Dependents                        int64
Status_Checking_Acc              object
Credit_History                   object
Purposre_Credit_Taken            object
Savings_Acc                      object
Years_At_Present_Employment      object
Marital_Status_Gender            object
Other_Debtors_Guarantors         object
Other_Inst_Plans                 object
Housing                          object
Job                              object
Telephone                        object
Foreign_Worker                   object
Duration_binned                category
Credit_amount_binned           category
Age_binned                     category
Default_On_Payment                int64
dtype: object

In [72]:
# create NN_dataset csv
NN_loan_data.to_csv('NN_dataset.csv', index=False, header=True)
NN_loan_data.head()


Unnamed: 0,Customer_ID,Num_CC,Dependents,Status_Checking_Acc,Credit_History,Purposre_Credit_Taken,Savings_Acc,Years_At_Present_Employment,Marital_Status_Gender,Other_Debtors_Guarantors,Other_Inst_Plans,Housing,Job,Telephone,Foreign_Worker,Duration_binned,Credit_amount_binned,Age_binned,Default_On_Payment
0,100001,2,1,A11,A34,A43,A65,A75,A93,A101,A143,A152,A173,A192,A201,B10,B20,B35,0
1,100002,1,1,A12,A32,A43,A61,A73,A92,A101,A143,A152,A173,A191,A201,B14,B22,B30,0
2,100003,1,2,A14,A34,A46,A61,A74,A93,A101,A143,A152,A172,A191,A201,B11,B20,B33,0
3,100004,1,2,A11,A32,A42,A61,A74,A93,A103,A143,A153,A173,A191,A201,B13,B23,B33,0
4,100005,2,2,A11,A33,A40,A61,A73,A93,A101,A143,A153,A173,A191,A201,B12,B21,B33,1


In [71]:
#check data types
NN_loan_data.dtypes

Customer_ID                       int64
Num_CC                            int64
Dependents                        int64
Status_Checking_Acc              object
Credit_History                   object
Purposre_Credit_Taken            object
Savings_Acc                      object
Years_At_Present_Employment      object
Marital_Status_Gender            object
Other_Debtors_Guarantors         object
Other_Inst_Plans                 object
Housing                          object
Job                              object
Telephone                        object
Foreign_Worker                   object
Duration_binned                category
Credit_amount_binned           category
Age_binned                     category
Default_On_Payment                int64
dtype: object

In [73]:
# use this csv for all NN_ML models