In [1]:
import sys
sys.path.append('/Users/gscerberus/Desktop/Loan_Prediction_Analysis')

In [2]:
import os
os.chdir('/Users/gscerberus/Desktop/Loan_Prediction_Analysis')

In [3]:
os.getcwd()

'/Users/gscerberus/Desktop/Loan_Prediction_Analysis'

In [4]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# hyperparameter values
from src.hyperparameters import (
    SVC_PARAM_GRID,
    LOGISTIC_REGRESSION_PARAM_GRID,
    DECISION_TREES_PARAM_GRID,
    RANDOM_FOREST_PARAM_GRID,
    XGB_PARAM_GRID)

from src.tuning import Tuning

# different models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier


In [5]:
df = pd.read_csv('data/final_loan_data_set.csv')

In [6]:
df

Unnamed: 0,Gender,Married,Education,Self_Employed,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status,TotalIncome
0,1.0,0.0,0.0,0.0,0.37,1.00,1.0,2.0,1.0,0.36
1,1.0,1.0,0.0,0.0,0.32,1.00,1.0,0.0,0.0,0.38
2,1.0,1.0,0.0,1.0,0.16,1.00,1.0,2.0,1.0,0.13
3,1.0,1.0,1.0,0.0,0.30,1.00,1.0,2.0,1.0,0.28
4,1.0,0.0,0.0,0.0,0.36,1.00,1.0,2.0,1.0,0.37
...,...,...,...,...,...,...,...,...,...,...
529,0.0,0.0,0.0,0.0,0.17,1.00,1.0,0.0,1.0,0.12
530,1.0,1.0,0.0,0.0,0.08,0.48,1.0,0.0,1.0,0.22
531,1.0,1.0,0.0,0.0,0.66,1.00,1.0,2.0,1.0,0.56
532,1.0,1.0,0.0,0.0,0.49,1.00,1.0,2.0,1.0,0.50


In [7]:
# Before choosing the right model, let's test some models and their hyperparameters
# Creating val, train and test set

# Will create 60-20-20 split
train, validate, test = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))])


In [8]:
X = validate.drop(columns=['Loan_Status'])
y = validate['Loan_Status']

In [9]:
# logistic regression
lr = LogisticRegression()
tuner = Tuning(model=lr, param_grid=LOGISTIC_REGRESSION_PARAM_GRID)
best_params = tuner.tune(X, y)

best_params

{'C': 1, 'penalty': 'l2'}

In [10]:
# svc
svc = SVC()
tuner = Tuning(model=svc, param_grid=SVC_PARAM_GRID)
best_params = tuner.tune(X, y)

best_params

{'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'}

In [11]:
# decision trees
dt = DecisionTreeClassifier()
tuner = Tuning(model=dt, param_grid=DECISION_TREES_PARAM_GRID)
best_params = tuner.tune(X, y)

best_params

{'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 5}

In [12]:
# random forest
rf = RandomForestClassifier()
tuner = Tuning(model=rf, param_grid=RANDOM_FOREST_PARAM_GRID)
best_params = tuner.tune(X, y)

best_params

{'max_depth': 10,
 'min_samples_leaf': 2,
 'min_samples_split': 10,
 'n_estimators': 200}

In [13]:
# xgb
xgb = XGBClassifier()
tuner = Tuning(model=xgb, param_grid=XGB_PARAM_GRID)
best_params = tuner.tune(X, y)

best_params

{'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}