## Loan Prediction Analysis

In [71]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
% matplotlib inline

In [81]:
import pickle
import requests, json

In [39]:
train = pd.read_csv("https://www.dropbox.com/s/fa0y3k8a5yyun1q/train.csv?dl=1")
test = pd.read_csv("https://www.dropbox.com/s/vibx2crbishj2qe/test.csv?dl=1")

## Building the Predictive Model

In [40]:
train.loc[train['Loan_Status'] == 'Y', 'Loan_Status'] = 1
train.loc[train['Loan_Status'] == 'N', 'Loan_Status'] = 0

### Fill Missing Data

In [41]:
train['Gender'].fillna(train['Gender'].mode()[0], inplace=True)
train['Married'].fillna(train['Married'].mode()[0], inplace=True)
train['Dependents'].fillna(train['Dependents'].mode()[0], inplace=True)
train['Loan_Amount_Term'].fillna(train['Loan_Amount_Term'].mode()[0], inplace=True)
train['Credit_History'].fillna(train['Credit_History'].mode()[0], inplace=True)

In [42]:
train['Self_Employed'].fillna(train['Self_Employed'].mode()[0], inplace=True)
train['LoanAmount'].fillna(train['LoanAmount'].mean(), inplace=True)

In [43]:
test['Gender'].fillna(test['Gender'].mode()[0], inplace=True)
test['Married'].fillna(test['Married'].mode()[0], inplace=True)
test['Dependents'].fillna(test['Dependents'].mode()[0], inplace=True)
test['Loan_Amount_Term'].fillna(test['Loan_Amount_Term'].mode()[0], inplace=True)
test['Credit_History'].fillna(test['Credit_History'].mode()[0], inplace=True)

In [44]:
test['Self_Employed'].fillna(test['Self_Employed'].mode()[0], inplace=True)
test['LoanAmount'].fillna(test['LoanAmount'].mean(), inplace=True)

### Label Encoder - Convert categorical variables to numeric

In [54]:
from sklearn.preprocessing import LabelEncoder
var_mod = ['Gender','Married','Dependents','Education','Self_Employed','Property_Area','Loan_Status']
le = LabelEncoder()
for i in var_mod:
    train[i] = le.fit_transform(train[i])

In [55]:
from sklearn.preprocessing import LabelEncoder
var_mod = ['Gender','Married','Dependents','Education','Self_Employed','Property_Area']
le = LabelEncoder()
for i in var_mod:
    test[i] = le.fit_transform(test[i])
# categorical variables are now encoded

In [77]:
train.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,1,0,0,0,0,5849,0.0,146.412162,360.0,1.0,2,1
1,LP001003,1,1,1,0,0,4583,1508.0,128.0,360.0,1.0,0,0
2,LP001005,1,1,0,0,1,3000,0.0,66.0,360.0,1.0,2,1
3,LP001006,1,1,0,1,0,2583,2358.0,120.0,360.0,1.0,2,1
4,LP001008,1,0,0,0,0,6000,0.0,141.0,360.0,1.0,2,1


### Build the Model

In [49]:
# Create the target (y) and features (X) numpy arrays: 
X = train[["Gender", "Married", "Dependents", "Education", "Self_Employed", "ApplicantIncome", "CoapplicantIncome", "LoanAmount", "Loan_Amount_Term", "Credit_History", "Property_Area"]].values
y = train['Loan_Status'].values

# build and fit the random forest classifer (rfc)
rfc = RandomForestClassifier(max_depth = 10, min_samples_split = 2, n_estimators = 100, random_state = 1)
forest = rfc.fit(X, y)

# print the score of the fitted random forest classifier
print(forest.score(X, y))

0.9250814332247557


In [50]:
# making the prediction - first extract test features
X_test = test[["Gender", "Married", "Dependents", "Education", "Self_Employed", "ApplicantIncome", "CoapplicantIncome", "LoanAmount", "Loan_Amount_Term", "Credit_History", "Property_Area"]].values

# make prediction using the test dataset
prediction_rfc = forest.predict(X_test)

# Create a data frame with two columns: PassengerId & Survived. Survived contains your predictions
forest_solution = pd.DataFrame(prediction_rfc, columns = ["Loan_Status"])
forest_solution.head()

Unnamed: 0,Loan_Status
0,1
1,1
2,1
3,1
4,1


### Model serialization / pickling

In [76]:
pickle.dump(rfc, open("loan_rfc.pkl", "wb"))

In [63]:
my_random_forest = pickle.load(open("loan_rfc.pkl", "rb"))

In [64]:
my_random_forest

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=10, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=1, verbose=0, warm_start=False)

Start the flask service, from the file 'flask_demo.py' Once its started, use the below to call it.

In [89]:
url = "http://127.0.0.1:9000/api"
data = json.dumps({'Gender': 1, 'Married': 1, 'Dependents': 2, 'Education': 3, 'Self_Employed': 1, 'ApplicantIncome': 5849, 'CoapplicantIncome': 0, 'LoanAmount': 141, 'Loan_Amount_Term': 360, 'Credit_History': 0, 'Property_Area': 0})
r = requests.post(url, data)

print(r.json())

JSONDecodeError: Expecting value: line 1 column 1 (char 0)