## LOAN PREDICTION PROBLEM

#### Load packages 

In [34]:
import pandas as pd
import numpy as np                     
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LogisticRegression
import m2cgen as m2c 
import warnings                        # To ignore any warnings
warnings.filterwarnings("ignore")

#### Reading the Data 

In [35]:
data = pd.read_csv("data/loans_data.csv")

#### Understanding the Data 

In [36]:
#show features represented in the dataset
for column in data.columns:
    print(column)

Loan_ID
Gender
Married
Dependents
Education
Self_Employed
ApplicantIncome
CoapplicantIncome
LoanAmount
Loan_Amount_Term
Credit_History
Property_Area
Loan_Status


In [37]:
#show the first 5 rows of the dataset
data.head() 

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [38]:
# check if data contain missing values
data.isnull().sum() 

Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [39]:
#create function to  preprocess the dataset

def preprocessing(data):

    # replace with numerical values
    data['Dependents'].replace('3+', 3,inplace=True)
    data['Loan_Status'].replace('N', 0,inplace=True)
    data['Loan_Status'].replace('Y', 1,inplace=True)

    # handle missing data 
    data['Gender'].fillna(data['Gender'].mode()[0], inplace=True)
    data['Married'].fillna(data['Married'].mode()[0], inplace=True)
    data['Dependents'].fillna(data['Dependents'].mode()[0], inplace=True)
    data['Self_Employed'].fillna(data['Self_Employed'].mode()[0], inplace=True)
    data['Credit_History'].fillna(data['Credit_History'].mode()[0], inplace=True)
    data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mode()[0], inplace=True)
    data['LoanAmount'].fillna(data['LoanAmount'].median(), inplace=True)

    # drop ID column
    data = data.drop('Loan_ID',axis=1)
    
    #split features and target 
    X = data.drop('Loan_Status',axis=1)
    y = data.Loan_Status.values

    #scale the  features 
    X  = pd.get_dummies(X,columns=["Gender","Married","Education","Self_Employed","Property_Area"])
    X = StandardScaler().fit_transform(X)
    

    return X, y 

In [40]:
#  preproces the dataset
X,y = preprocessing(data) 

In [41]:
# split into train and test set 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [42]:
# create and train the classifier 
classifier = LogisticRegression()

classifier.fit(X_train,y_train)

LogisticRegression()

In [43]:
# convert model to pure python code  
model_to_python = m2c.export_to_python(classifier)  

In [44]:
model_to_python 

'def score(input):\n    return (((((((((((((((((0.824684362781952) + ((input[0]) * (0.06705188608830237))) + ((input[1]) * (0.030980248874473752))) + ((input[2]) * (-0.15156641870891968))) + ((input[3]) * (-0.1557993018671343))) + ((input[4]) * (-0.11555585667999257))) + ((input[5]) * (1.2714119355949056))) + ((input[6]) * (-0.029162094298996747))) + ((input[7]) * (0.029162094298996657))) + ((input[8]) * (-0.14545593952238894))) + ((input[9]) * (0.14545593952238892))) + ((input[10]) * (0.08574302769029615))) + ((input[11]) * (-0.08574302769029615))) + ((input[12]) * (0.013236588480741619))) + ((input[13]) * (-0.013236588480741728))) + ((input[14]) * (-0.14608079426164294))) + ((input[15]) * (0.2145218271115469))) + ((input[16]) * (-0.08025881770412328))\n'

In [45]:
#pure python code 

def score(input):
    
    return (((((((((((((((((0.7929123964945446) + ((input[0]) * (0.07801862594632314))) + ((input[1]) * (-0.014853900985478468))) + ((input[2]) * (-0.15783041201914427))) + ((input[3]) * (-0.05222073553791883))) + ((input[4]) * (-0.0787403404504791))) + ((input[5]) * (1.3714807410150505))) + ((input[6]) * (0.015077765348160292))) + ((input[7]) * (-0.015077765348160353))) + ((input[8]) * (-0.12161041350915254))) + ((input[9]) * (0.12161041350915253))) + ((input[10]) * (0.09387440269562626))) + ((input[11]) * (-0.09387440269562626))) + ((input[12]) * (-0.0047109053878701835))) + ((input[13]) * (0.004710905387870008))) + ((input[14]) * (-0.14569247529698154))) + ((input[15]) * (0.19858601990225683))) + ((input[16]) * (-0.06417592734444703))

In [46]:
# select single test data to use from the test set
test_data = X_test[6]
test_data

array([-0.73780632, -0.26891249,  0.01530652, -0.12794631,  0.2732313 ,
        0.41173269,  2.11710719, -2.11710719,  1.37208932, -1.37208932,
        0.52836225, -0.52836225,  0.39260074, -0.39260074,  1.55889948,
       -0.7820157 , -0.70020801])

In [47]:
# perform predicton by using trained model
pred = classifier.predict(test_data.reshape(1,-1))  
print("prediction result: {}".format(pred))

prediction result: [1]


In [48]:
# test prediction in pure python code 
input = [ 1.24474546,  1.9817189 , -0.55448733,  3.02536229,  0.2732313 ,
        0.41173269, -0.47234264,  0.47234264, -0.72881553,  0.72881553,
        0.52836225, -0.52836225, -2.54711697,  2.54711697,  1.55889948,
       -0.7820157 , -0.70020801]

pred = score(input) 
print("prediction result: {}".format(int(pred)))

prediction result: 1


In [49]:
# convert model to pure PHP code  
model_to_php = m2c.export_to_php(classifier)  

In [50]:
model_to_php 

'<?php\nfunction score(array $input) {\n    return (((((((((((((((((0.824684362781952) + (($input[0]) * (0.06705188608830237))) + (($input[1]) * (0.030980248874473752))) + (($input[2]) * (-0.15156641870891968))) + (($input[3]) * (-0.1557993018671343))) + (($input[4]) * (-0.11555585667999257))) + (($input[5]) * (1.2714119355949056))) + (($input[6]) * (-0.029162094298996747))) + (($input[7]) * (0.029162094298996657))) + (($input[8]) * (-0.14545593952238894))) + (($input[9]) * (0.14545593952238892))) + (($input[10]) * (0.08574302769029615))) + (($input[11]) * (-0.08574302769029615))) + (($input[12]) * (0.013236588480741619))) + (($input[13]) * (-0.013236588480741728))) + (($input[14]) * (-0.14608079426164294))) + (($input[15]) * (0.2145218271115469))) + (($input[16]) * (-0.08025881770412328));\n}\n'

In [51]:
# convert model to pure Javascript code  
model_to_javascript = m2c.export_to_javascript(classifier)  

In [52]:
model_to_javascript 

'function score(input) {\n    return (((((((((((((((((0.824684362781952) + ((input[0]) * (0.06705188608830237))) + ((input[1]) * (0.030980248874473752))) + ((input[2]) * (-0.15156641870891968))) + ((input[3]) * (-0.1557993018671343))) + ((input[4]) * (-0.11555585667999257))) + ((input[5]) * (1.2714119355949056))) + ((input[6]) * (-0.029162094298996747))) + ((input[7]) * (0.029162094298996657))) + ((input[8]) * (-0.14545593952238894))) + ((input[9]) * (0.14545593952238892))) + ((input[10]) * (0.08574302769029615))) + ((input[11]) * (-0.08574302769029615))) + ((input[12]) * (0.013236588480741619))) + ((input[13]) * (-0.013236588480741728))) + ((input[14]) * (-0.14608079426164294))) + ((input[15]) * (0.2145218271115469))) + ((input[16]) * (-0.08025881770412328));\n}\n'