### Load and Preprocess Data

In [2]:
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,StandardScaler
from sklearn.model_selection import train_test_split

In [3]:
data = pd.read_csv('loan_data.csv')
data.head()

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,female,Master,71948.0,0,RENT,35000.0,PERSONAL,16.02,0.49,3.0,561,No,1
1,21.0,female,High School,12282.0,0,OWN,1000.0,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,25.0,female,High School,12438.0,3,MORTGAGE,5500.0,MEDICAL,12.87,0.44,3.0,635,No,1
3,23.0,female,Bachelor,79753.0,0,RENT,35000.0,MEDICAL,15.23,0.44,2.0,675,No,1
4,24.0,male,Master,66135.0,1,RENT,35000.0,MEDICAL,14.27,0.53,4.0,586,No,1


In [4]:
# Enocde data using label encoder and one hot enocder and create pickle file
label_encoder_gender = LabelEncoder()
label_encoder_prev_loan = LabelEncoder()
data['person_gender'] = label_encoder_gender.fit_transform(data['person_gender'])
data['previous_loan_defaults_on_file'] = label_encoder_prev_loan.fit_transform(data['previous_loan_defaults_on_file'])

one_hot_encoder = OneHotEncoder()

one_hot_encoded_values = one_hot_encoder.fit_transform(data[['person_education','person_home_ownership','loan_intent']]).toarray()

with open('pickle/person_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)

with open('pickle/prev_loan.pkl','wb') as file:
    pickle.dump(label_encoder_prev_loan,file)

with open('pickle/one_hot_encoded.pkl','wb') as file:
    pickle.dump(one_hot_encoder,file)

one_hot_encoded_values


array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [5]:
# create final table

encoded_values_df = pd.DataFrame(one_hot_encoded_values, columns=one_hot_encoder.get_feature_names_out())

encoded_values_df.head()

data = pd.concat([data.drop(['person_education','person_home_ownership','loan_intent'],axis = 1),encoded_values_df],axis=1)


In [29]:
data.columns

Index(['person_age', 'person_gender', 'person_income', 'person_emp_exp',
       'loan_amnt', 'loan_int_rate', 'loan_percent_income',
       'cb_person_cred_hist_length', 'credit_score',
       'previous_loan_defaults_on_file', 'loan_status',
       'person_education_Associate', 'person_education_Bachelor',
       'person_education_Doctorate', 'person_education_High School',
       'person_education_Master', 'person_home_ownership_MORTGAGE',
       'person_home_ownership_OTHER', 'person_home_ownership_OWN',
       'person_home_ownership_RENT', 'loan_intent_DEBTCONSOLIDATION',
       'loan_intent_EDUCATION', 'loan_intent_HOMEIMPROVEMENT',
       'loan_intent_MEDICAL', 'loan_intent_PERSONAL', 'loan_intent_VENTURE'],
      dtype='object')

In [7]:
X = data.drop('loan_status',axis=1)
Y = data['loan_status']

X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

with open('pickle/scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

In [8]:
data.head()

Unnamed: 0,person_age,person_gender,person_income,person_emp_exp,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,...,person_home_ownership_MORTGAGE,person_home_ownership_OTHER,person_home_ownership_OWN,person_home_ownership_RENT,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE
0,22.0,0,71948.0,0,35000.0,16.02,0.49,3.0,561,0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1,21.0,0,12282.0,0,1000.0,11.14,0.08,2.0,504,1,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,25.0,0,12438.0,3,5500.0,12.87,0.44,3.0,635,0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,23.0,0,79753.0,0,35000.0,15.23,0.44,2.0,675,0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
4,24.0,1,66135.0,1,35000.0,14.27,0.53,4.0,586,0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


### ANN 

In [9]:
import tensorflow as tf
from tensorflow.keras.models import Sequential #for seq model
from tensorflow.keras.layers import Dense # for nodes (Hidden Layer)
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import keras
import datetime

In [21]:
model = Sequential([
    Dense(16,activation='relu',input_shape = (X_train.shape[1],)),
    Dense(16,activation='relu'),
    Dense(1,activation='sigmoid')
]
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [22]:
model.summary()

In [23]:
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer="adam",loss = "binary_crossentropy",metrics=['accuracy'])

In [24]:
early_stopping = EarlyStopping(patience=10,restore_best_weights=True,monitor='val_loss')
log_dir = "logs/fit"+ datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir,histogram_freq=1)


In [26]:
history = model.fit(X_train,Y_train,validation_data=(X_test,Y_test),epochs=50,
                    callbacks=[early_stopping,tensorboard_callback])

Epoch 1/50
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.8027 - loss: 0.3974 - val_accuracy: 0.8948 - val_loss: 0.2304
Epoch 2/50
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8998 - loss: 0.2185 - val_accuracy: 0.8980 - val_loss: 0.2214
Epoch 3/50
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9040 - loss: 0.2114 - val_accuracy: 0.9029 - val_loss: 0.2125
Epoch 4/50
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9086 - loss: 0.2020 - val_accuracy: 0.9074 - val_loss: 0.2082
Epoch 5/50
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9130 - loss: 0.1926 - val_accuracy: 0.9074 - val_loss: 0.2031
Epoch 6/50
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9128 - loss: 0.1911 - val_accuracy: 0.9096 - val_loss: 0.2008
Epoch 7/50
[1m1

In [31]:
model.save('model.h5')

