In [155]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.linear_model import LinearRegression,LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor,DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR,SVC
from sklearn.preprocessing import LabelEncoder,StandardScaler,MaxAbsScaler,MinMaxScaler,OneHotEncoder
from sklearn.metrics import classification_report,confusion_matrix
import seaborn as sns



In [156]:
df=pd.read_csv("loan_data.csv")
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
1,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
2,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
3,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
4,LP001013,Male,Yes,0,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y


In [157]:
df.isnull().sum()

Loan_ID               0
Gender                5
Married               0
Dependents            8
Education             0
Self_Employed        21
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            0
Loan_Amount_Term     11
Credit_History       30
Property_Area         0
Loan_Status           0
dtype: int64

In [158]:
df.shape

(381, 13)

In [159]:
df.columns

Index(['Loan_ID', 'Gender', 'Married', 'Dependents', 'Education',
       'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status'],
      dtype='object')

In [160]:
df1=df.drop(['Loan_ID','Gender', 'Married','Dependents'],axis=1)

In [161]:
df1.isnull().sum()

Education             0
Self_Employed        21
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            0
Loan_Amount_Term     11
Credit_History       30
Property_Area         0
Loan_Status           0
dtype: int64

In [162]:
df1.head()

Unnamed: 0,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
1,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
2,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
3,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
4,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y


In [163]:
df1.Education.unique()

array(['Graduate', 'Not Graduate'], dtype=object)

In [164]:
df1.Self_Employed.unique()

array(['No', 'Yes', nan], dtype=object)

In [165]:
df1.Credit_History.unique()

array([ 1., nan,  0.])

In [166]:
df1.dropna(inplace=True)

In [167]:
df1.isnull().sum()

Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

In [168]:
df1.shape

(320, 9)

In [169]:
df1.Loan_Status.unique()

array(['N', 'Y'], dtype=object)

In [170]:
df1.Property_Area.unique()


array(['Rural', 'Urban', 'Semiurban'], dtype=object)

In [171]:
df2 = pd.get_dummies(df1, columns=['Property_Area','Education','Self_Employed'],dtype=int)
df2.head()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Loan_Status,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban,Education_Graduate,Education_Not Graduate,Self_Employed_No,Self_Employed_Yes
0,4583,1508.0,128.0,360.0,1.0,N,1,0,0,1,0,1,0
1,3000,0.0,66.0,360.0,1.0,Y,0,0,1,1,0,0,1
2,2583,2358.0,120.0,360.0,1.0,Y,0,0,1,0,1,1,0
3,6000,0.0,141.0,360.0,1.0,Y,0,0,1,1,0,1,0
4,2333,1516.0,95.0,360.0,1.0,Y,0,0,1,0,1,1,0


In [172]:
df2['Loan_Status'] = df2['Loan_Status'].replace({'N': 0, 'Y': 1})
df2.head()


Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Loan_Status,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban,Education_Graduate,Education_Not Graduate,Self_Employed_No,Self_Employed_Yes
0,4583,1508.0,128.0,360.0,1.0,0,1,0,0,1,0,1,0
1,3000,0.0,66.0,360.0,1.0,1,0,0,1,1,0,0,1
2,2583,2358.0,120.0,360.0,1.0,1,0,0,1,0,1,1,0
3,6000,0.0,141.0,360.0,1.0,1,0,0,1,1,0,1,0
4,2333,1516.0,95.0,360.0,1.0,1,0,0,1,0,1,1,0


In [173]:
df2.dtypes

ApplicantIncome              int64
CoapplicantIncome          float64
LoanAmount                 float64
Loan_Amount_Term           float64
Credit_History             float64
Loan_Status                  int64
Property_Area_Rural          int32
Property_Area_Semiurban      int32
Property_Area_Urban          int32
Education_Graduate           int32
Education_Not Graduate       int32
Self_Employed_No             int32
Self_Employed_Yes            int32
dtype: object

In [174]:
X=df2.drop('Loan_Status',axis=1)
y=df2.Loan_Status


In [175]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [176]:
model=LogisticRegression()
# model=DecisionTreeClassifier()

model.fit(X_train,y_train)
model.score(X_test,y_test)

0.8125