<a href="https://colab.research.google.com/github/Ryanmathew64/LoanPredictionModel/blob/main/Loan_Prediction_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Utilizing a Linear Regression Machine Learning Model to Predict Loan Eligibility

Setup Tools

In [3]:
!pip install -q sklearn

In [4]:
%tensorflow_version 2.x

Import Packages

In [29]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import io
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow.compat.v2.feature_column as fc

import tensorflow as tf

Load and Parse Datasets

In [6]:
from google.colab import files
uploaded = files.upload()

Saving test_Y3wMUE5_7gLdaTN.csv to test_Y3wMUE5_7gLdaTN.csv
Saving train_u6lujuX_CVtuZ9i.csv to train_u6lujuX_CVtuZ9i.csv


In [70]:
from pickle import STRING
dftrain = pd.read_csv(io.BytesIO(uploaded['train_u6lujuX_CVtuZ9i.csv']))
dfeval = pd.read_csv(io.BytesIO(uploaded['test_Y3wMUE5_7gLdaTN.csv']))

# parse dftrain
dftrain['Self_Employed'].fillna('', inplace=True)
dftrain['Gender'].fillna('', inplace=True)
dftrain['Married'].fillna('', inplace=True)
dftrain['Education'].fillna('', inplace=True)
dftrain['Property_Area'].fillna('', inplace=True)

dftrain['LoanAmount'].fillna(0, inplace=True)
dftrain['Loan_Amount_Term'].fillna(0, inplace=True)
dftrain['Credit_History'].fillna(2, inplace=True)
dftrain.Dependents.replace({'3+': 3}, regex=True, inplace=True)
dftrain.Dependents.fillna(4, inplace=True)

dftrain.Dependents = dftrain.Dependents.astype(float)

# parse dfeval
dfeval['Self_Employed'].fillna('', inplace=True)
dfeval['Gender'].fillna('', inplace=True)
dfeval['Married'].fillna('', inplace=True)
dfeval['Education'].fillna('', inplace=True)
dfeval['Property_Area'].fillna('', inplace=True)

dfeval['LoanAmount'].fillna(0, inplace=True)
dfeval['Loan_Amount_Term'].fillna(0, inplace=True)
dfeval['Credit_History'].fillna(2, inplace=True)
dfeval.Dependents.replace({'3+': 3}, regex=True, inplace=True)
dfeval.Dependents.fillna(4, inplace=True)

dfeval.Dependents = dfeval.Dependents.astype(float) 

y_train = dftrain.pop('Loan_Status')

y_train.replace({'Y': 1}, regex=True, inplace=True)
y_train.replace({'N': 0}, regex=True, inplace=True)
y_train = y_train.astype(int)

print(dftrain.dtypes)

Loan_ID               object
Gender                object
Married               object
Dependents           float64
Education             object
Self_Employed         object
ApplicantIncome        int64
CoapplicantIncome    float64
LoanAmount           float64
Loan_Amount_Term     float64
Credit_History       float64
Property_Area         object
dtype: object


Parse Feature Columns

In [73]:
CATEGORICAL_COLUMNS = ['Loan_ID', 'Gender', 'Married', 'Education',
                        'Self_Employed', 'Property_Area']
NUMERIC_COLUMNS = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term',
                   'Credit_History', 'Dependents']

features = ['Loan_ID', 'Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term', 'Credit_History','Property_Area']

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = dftrain[feature_name].unique() 
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

print(feature_columns)

[VocabularyListCategoricalColumn(key='Loan_ID', vocabulary_list=('LP001002', 'LP001003', 'LP001005', 'LP001006', 'LP001008', 'LP001011', 'LP001013', 'LP001014', 'LP001018', 'LP001020', 'LP001024', 'LP001027', 'LP001028', 'LP001029', 'LP001030', 'LP001032', 'LP001034', 'LP001036', 'LP001038', 'LP001041', 'LP001043', 'LP001046', 'LP001047', 'LP001050', 'LP001052', 'LP001066', 'LP001068', 'LP001073', 'LP001086', 'LP001087', 'LP001091', 'LP001095', 'LP001097', 'LP001098', 'LP001100', 'LP001106', 'LP001109', 'LP001112', 'LP001114', 'LP001116', 'LP001119', 'LP001120', 'LP001123', 'LP001131', 'LP001136', 'LP001137', 'LP001138', 'LP001144', 'LP001146', 'LP001151', 'LP001155', 'LP001157', 'LP001164', 'LP001179', 'LP001186', 'LP001194', 'LP001195', 'LP001197', 'LP001198', 'LP001199', 'LP001205', 'LP001206', 'LP001207', 'LP001213', 'LP001222', 'LP001225', 'LP001228', 'LP001233', 'LP001238', 'LP001241', 'LP001243', 'LP001245', 'LP001248', 'LP001250', 'LP001253', 'LP001255', 'LP001256', 'LP001259',

Define Input Functions for the Datasets

In [76]:
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function(): 
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df)) 
    if shuffle:
      ds = ds.shuffle(1000)  
    ds = ds.batch(batch_size).repeat(num_epochs)  
    return ds  
  return input_function

def input_fn(features, batch_size=256):
    # Convert the inputs to a Dataset without labels.
    return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)  

train_input_fn = make_input_fn(dftrain, y_train)

Create and Train the Model

In [None]:
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)

Predicting and Testing with the Model 

In [85]:
res = []
for index, row in dfeval.iterrows():
  predict = {}
  for feature in features:
    valid = True
    isString = dftrain[feature].dtype != float
    while valid: 
      if isString:
        val2 = row[index]
        valid = False
      else:
        val = row[index]
        if not val.isdigit(): valid = False

    print(dftrain[feature].dtype)
    if isString:
      predict[feature] = [val2]
    else:
      predict[feature] = [val]

  predictions = linear_est.predict(input_fn=lambda: input_fn(predict))
  for pred_dict in predictions:
      class_id = pred_dict['class_ids'][0]
      probability = pred_dict['probabilities'][class_id]

      print('Prediction is "{}" ({:.1f}%)'.format(
          class_id, 100 * probability))

object
object
object
float64
object
object
int64
float64
float64
float64
float64
object
INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.
  getter=tf.compat.v1.get_variable)


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from /tmp/tmpfhuhebou/model.ckpt-200


INFO:tensorflow:Restoring parameters from /tmp/tmpfhuhebou/model.ckpt-200


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


UnimplementedError: ignored