##**Mounting Drive**

In [None]:
from google.colab import drive
import os
drive.mount('/content/drive', force_remount=True)
os.chdir('/content/drive/MyDrive/Personal_Projects')

Mounted at /content/drive


##**Artifical Neural Networks**

**Importing Libraries**

In [None]:
import keras
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Dense, Input
from keras.optimizers import Adam
import pydot
import graphviz
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, average_precision_score
import seaborn as sns
import pandas as pd
import numpy as np

**Pre-Processing**

In [None]:
data = pd.read_csv("Credit_Scoring.csv")
data.head()

Unnamed: 0,Status,Seniority,Home,Time,Age,Marital,Records,Job,Expenses,Income,Assets,Debt,Amount,Price,Finrat,Savings
0,good,9,rent,60,30,married,no_rec,freelance,73,129,0,0,800,846,94.562648,4.2
1,good,17,rent,60,58,widow,no_rec,fixed,48,131,0,0,1000,1658,60.313631,4.98
2,bad,10,owner,36,46,married,yes_rec,freelance,90,200,3000,0,2000,2985,67.001675,1.98
3,good,0,rent,60,24,single,no_rec,fixed,63,182,2500,0,900,1325,67.924528,7.933333
4,good,0,rent,36,26,single,no_rec,fixed,46,107,0,0,310,910,34.065934,7.083871


In [None]:
data.shape

(4446, 16)

In [None]:
# Checking if any Missing Values are there in the Dataset
data.isnull().any()

Status       False
Seniority    False
Home         False
Time         False
Age          False
Marital      False
Records      False
Job          False
Expenses     False
Income       False
Assets       False
Debt         False
Amount       False
Price        False
Finrat       False
Savings      False
dtype: bool

In [None]:
# Checking if any categorical Features are there in the Dataset
categorical_data = data.select_dtypes(exclude=[np.number])
print ("There are {} categorical Columns in Dataset".format(categorical_data.shape[1]))

There are 5 categorical Columns in Dataset


In [None]:
# Name of all the Categorical Features Present in the Dataset
categorical_data.any()

Status     True
Home       True
Marital    True
Records    True
Job        True
dtype: bool

In [None]:
encoder =  LabelEncoder()
data['Status'] = encoder.fit_transform(data['Status'].astype('str'))
data['Home'] = encoder.fit_transform(data['Home'].astype('str'))
data['Marital'] = encoder.fit_transform(data['Marital'].astype('str'))
data['Job'] = encoder.fit_transform(data['Job'].astype('str'))
data['Records'] = encoder.fit_transform(data['Records'].astype('str'))

In [None]:
#Checking Data Types of the Features for Confirmation
data.dtypes

Status         int64
Seniority      int64
Home           int64
Time           int64
Age            int64
Marital        int64
Records        int64
Job            int64
Expenses       int64
Income         int64
Assets         int64
Debt           int64
Amount         int64
Price          int64
Finrat       float64
Savings      float64
dtype: object

In [None]:
# Value Counts of 'GOOD' Status and 'BAD' Status
# 'GOOD': 1 and 'BAD': 0
data.Status.value_counts()

1    3197
0    1249
Name: Status, dtype: int64

In [None]:
X = data.iloc[:,1:].values
y = data.iloc[:,0].values
Y = pd.get_dummies(y).values

In [None]:
y.size, X.size, Y.size, len(y), len(X), len(Y)

(4446, 66690, 8892, 4446, 4446, 4446)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, shuffle=True)

In [None]:
print('Size')
print(f'X_train: {X_train.size}\nX_test: {X_test.size}\ny_train: {y_train.size}\ny_test: {y_test.size}\ndata: {data.size}\n')

print('Shape')
print(f'X_train: {X_train.shape}\nX_test: {X_test.shape}\ny_train: {y_train.shape}\ny_test: {y_test.shape}\ndata: {data.shape}\n')

Size
X_train: 53340
X_test: 13350
y_train: 3556
y_test: 890
data: 71136

Shape
X_train: (3556, 15)
X_test: (890, 15)
y_train: (3556,)
y_test: (890,)
data: (4446, 16)



In [None]:
y_train = tf.one_hot(y_train, depth=2)
y_test = tf.one_hot(y_test, depth=2)

In [None]:
print('Shape')
print(f'X_train: {X_train.shape}\nX_test: {X_test.shape}\ny_train: {y_train.shape}\ny_test: {y_test.shape}\ndata: {data.shape}\n')

Shape
X_train: (3556, 15)
X_test: (890, 15)
y_train: (3556, 2)
y_test: (890, 2)
data: (4446, 16)



In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

**Building, Training and Testing The Model**

I Build the models using the old fashioned trial and error method

Model1 seems to give the highest accuracy

In [None]:
# Acc: 78.94
# Weights file: two_layer_credit_score_model.h5
def build_model1(weights_path=None):
  input = Input((15,))
  layer1 = Dense(30, activation='relu')(input)
  layer2 = Dense(30, activation='relu')(layer1)
  output = Dense(2, activation='softmax')(layer2)

  if not weights_path:
    model = Model(inputs=input, outputs=output)
    model.compile(optimizer="Adam", loss='binary_crossentropy', metrics=['accuracy'])
  else:
    model = Model(inputs=input, outputs=output)
    model.load_weights(weights_path)
  return model


# Acc: 78.50
# Weights file: credit_score_model.h5
def build_model2(weights_path=None):
  input = Input((15,))
  layer1 = Dense(30, activation='relu')(input)
  layer2 = Dense(30, activation='relu')(layer1)
  layer3 = Dense(15, activation='relu')(layer2)
  output = Dense(2, activation='softmax')(layer3)

  if not weights_path:
    model = Model(inputs=input, outputs=output)
    model.compile(optimizer="Adam", loss='binary_crossentropy', metrics=['accuracy'])
  else:
    model = Model(inputs=input, outputs=output)
    model.load_weights(weights_path)
  return model


def build_model3(weights_path=None):
  input = Input((15,))
  layer1 = Dense(30, activation='sigmoid')(input)
  layer2 = Dense(30, activation='sigmoid')(layer1)
  output = Dense(2, activation='softmax')(layer2)

  if not weights_path:
    model = Model(inputs=input, outputs=output)
    model.compile(optimizer="Adam", loss='binary_crossentropy', metrics=['accuracy'])
  else:
    model = Model(inputs=input, outputs=output)
    model.load_weights(weights_path)
  return model


def build_model4(weights_path=None):
  input = Input((15,))
  layer1 = Dense(30, activation='sigmoid')(input)
  layer2 = Dense(30, activation='sigmoid')(layer1)
  layer3 = Dense(15, activation='sigmoid')(layer2)
  output = Dense(2, activation='softmax')(layer3)

  if not weights_path:
    model = Model(inputs=input, outputs=output)
    model.compile(optimizer="Adam", loss='binary_crossentropy', metrics=['accuracy'])
  else:
    model = Model(inputs=input, outputs=output)
    model.load_weights(weights_path)
  return model


def test_model(model, X_test, y_test):
  y_pred = model.predict(X_test)
  y_test_class = np.argmax(y_test,axis=1)
  y_pred_class = np.argmax(y_pred,axis=1)

  print('Classification Report:')
  print(classification_report(y_test_class,y_pred_class))
  print('Confusion Martix:')
  print(confusion_matrix(y_test_class,y_pred_class))

In [None]:
model = build_model1()

In [None]:
model.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 15)]              0         
_________________________________________________________________
dense_6 (Dense)              (None, 30)                480       
_________________________________________________________________
dense_7 (Dense)              (None, 30)                930       
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 62        
Total params: 1,472
Trainable params: 1,472
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(X_train, y_train, batch_size=10, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f7c6b6a9c90>

In [None]:
model.save_weights("two_layer_credit_score_model.h5")

In [None]:
y_pred = model.predict(X_test)
y_test_class = np.argmax(y_test,axis=1)
y_pred_class = np.argmax(y_pred,axis=1)

In [None]:
print(classification_report(y_test_class,y_pred_class))

              precision    recall  f1-score   support

           0       0.60      0.59      0.59       252
           1       0.84      0.85      0.84       638

    accuracy                           0.77       890
   macro avg       0.72      0.72      0.72       890
weighted avg       0.77      0.77      0.77       890



In [None]:
print(confusion_matrix(y_test_class,y_pred_class))

[[148 104]
 [ 98 540]]


In [None]:
round(average_precision_score(y_test_class,y_pred_class), 2)

0.82

In [None]:
round(accuracy_score(y_test_class,y_pred_class), 2)

0.77