In [36]:
import pandas as pd
from sklearn.preprocessing import Imputer, StandardScaler

In [37]:
train = pd.read_csv(r"F:\Desktop\DataSets\Loan_Prediction\train.csv")
test = pd.read_csv(r"F:\Desktop\DataSets\Loan_Prediction\test.csv")

In [38]:
print(train.isnull().sum(), train.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 13 columns):
Loan_ID              614 non-null object
Gender               601 non-null object
Married              611 non-null object
Dependents           599 non-null object
Education            614 non-null object
Self_Employed        582 non-null object
ApplicantIncome      614 non-null int64
CoapplicantIncome    614 non-null float64
LoanAmount           592 non-null float64
Loan_Amount_Term     600 non-null float64
Credit_History       564 non-null float64
Property_Area        614 non-null object
Loan_Status          614 non-null object
dtypes: float64(4), int64(1), object(8)
memory usage: 62.4+ KB
Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Sta

In [39]:
train = train.set_index('Loan_ID')
test = test.set_index('Loan_ID')

In [40]:
train['Loan_Status'] = train['Loan_Status'].astype('category')
cat_cols = ['Gender', 'Married', 'Dependents','Education', 'Self_Employed', 'Property_Area']
for cols in cat_cols:
    train[cols] = train[cols].astype('category')
    test[cols] = test[cols].astype('category')

In [41]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 614 entries, LP001002 to LP002990
Data columns (total 12 columns):
Gender               601 non-null category
Married              611 non-null category
Dependents           599 non-null category
Education            614 non-null category
Self_Employed        582 non-null category
ApplicantIncome      614 non-null int64
CoapplicantIncome    614 non-null float64
LoanAmount           592 non-null float64
Loan_Amount_Term     600 non-null float64
Credit_History       564 non-null float64
Property_Area        614 non-null category
Loan_Status          614 non-null category
dtypes: category(7), float64(4), int64(1)
memory usage: 33.7+ KB


In [42]:
for col in cat_cols:
    mode  = train[col].value_counts().index[0]
    train[col] = train[col].fillna(mode)
    test[col] = test[col].fillna(mode)

In [43]:
train.isnull().sum()

Gender                0
Married               0
Dependents            0
Education             0
Self_Employed         0
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [44]:
test.isnull().sum()

Gender                0
Married               0
Dependents            0
Education             0
Self_Employed         0
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            5
Loan_Amount_Term      6
Credit_History       29
Property_Area         0
dtype: int64

In [45]:
miss_cols = ['LoanAmount', 'Loan_Amount_Term','Credit_History']

imp = Imputer(missing_values= 'NaN', strategy = 'median', copy = False)

for col in miss_cols:
    train[col] = imp.fit_transform(train[col].values.reshape(-1, 1))
    test[col] = imp.fit_transform(test[col].values.reshape(-1, 1))

In [46]:
train.isnull().sum()

Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

In [47]:
test.isnull().sum()

Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
dtype: int64

In [48]:
train.head()

Unnamed: 0_level_0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
Loan_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
LP001002,Male,No,0,Graduate,No,5849,0.0,128.0,360.0,1.0,Urban,Y
LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [49]:
num_columns = ['ApplicantIncome'] + miss_cols

In [50]:
num_columns

['ApplicantIncome', 'LoanAmount', 'Loan_Amount_Term', 'Credit_History']

In [51]:
scaler = StandardScaler()
for cols in num_columns:
    train[cols] = scaler.fit_transform(train[col].values.reshape(-1, 1))
    test[cols] = scaler.fit_transform(test[col].values.reshape(-1, 1))

In [52]:
train.head()

Unnamed: 0_level_0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
Loan_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
LP001002,Male,No,0,Graduate,No,0.411733,0.0,0.411733,0.411733,0.411733,Urban,Y
LP001003,Male,Yes,1,Graduate,No,0.411733,1508.0,0.411733,0.411733,0.411733,Rural,N
LP001005,Male,Yes,0,Graduate,Yes,0.411733,0.0,0.411733,0.411733,0.411733,Urban,Y
LP001006,Male,Yes,0,Not Graduate,No,0.411733,2358.0,0.411733,0.411733,0.411733,Urban,Y
LP001008,Male,No,0,Graduate,No,0.411733,0.0,0.411733,0.411733,0.411733,Urban,Y


In [53]:
test.head()

Unnamed: 0_level_0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
Loan_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
LP001015,Male,Yes,0,Graduate,No,0.437674,0,0.437674,0.437674,0.437674,Urban
LP001022,Male,Yes,1,Graduate,No,0.437674,1500,0.437674,0.437674,0.437674,Urban
LP001031,Male,Yes,2,Graduate,No,0.437674,1800,0.437674,0.437674,0.437674,Urban
LP001035,Male,Yes,2,Graduate,No,0.437674,2546,0.437674,0.437674,0.437674,Urban
LP001051,Male,No,0,Not Graduate,No,0.437674,0,0.437674,0.437674,0.437674,Urban


In [107]:
train1 = pd.get_dummies(train, columns = cat_cols, drop_first = True)
test1 = pd.get_dummies(test, columns = cat_cols, drop_first = True)

In [108]:
x = train1.drop(['Loan_Status'], axis = 1)
y = train1['Loan_Status']

In [109]:
from sklearn.neural_network import MLPClassifier

model = MLPClassifier(hidden_layer_sizes = (20 ), activation = 'logistic', solver = 'lbfgs', learning_rate_init = 0.1, batch_size = 10)

In [110]:
model.fit(x, y)

MLPClassifier(activation='logistic', alpha=0.0001, batch_size=10, beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=20, learning_rate='constant',
       learning_rate_init=0.1, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [111]:
train_preds = model.predict(x)
test_preds = model.predict(test1)

In [112]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y, train_preds)
clr = classification_report(y, train_preds)

In [113]:
print(cm, clr)

[[ 85 107]
 [  8 414]]              precision    recall  f1-score   support

          N       0.91      0.44      0.60       192
          Y       0.79      0.98      0.88       422

avg / total       0.83      0.81      0.79       614

