<a href="https://colab.research.google.com/github/Rakxxd/LoanPrediction/blob/main/LoanHackathon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### This notebook contains information about how we can handle imbalanced dataset, also contains hyperparameter tuning for 2 of the most famous kaggle winnng algorithms XGBoost and LightGBM. 
### This notebook also covers how stacking can be used with these 2 models. This can be a good learning curve for anyone who wants to get high accuracy on datasets.
### The data used for this notebook is Loan Prediciton dataset picked from Analytics Vidhya Hackathon.

# Importing Required Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas_profiling import ProfileReport
from sklearn.model_selection import train_test_split, RepeatedKFold, cross_val_score, KFold
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import StackingClassifier

# EDA and Dta Cleaning

In [5]:
df_train = pd.read_csv('/content/train_ctrUa4K.csv')
df_test = pd.read_csv('/content/test_lAUu6dG.csv')

In [6]:
df_train

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,LP002978,Female,No,0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y
610,LP002979,Male,Yes,3+,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,Y
611,LP002983,Male,Yes,1,Graduate,No,8072,240.0,253.0,360.0,1.0,Urban,Y
612,LP002984,Male,Yes,2,Graduate,No,7583,0.0,187.0,360.0,1.0,Urban,Y


In [7]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Loan_ID            614 non-null    object 
 1   Gender             601 non-null    object 
 2   Married            611 non-null    object 
 3   Dependents         599 non-null    object 
 4   Education          614 non-null    object 
 5   Self_Employed      582 non-null    object 
 6   ApplicantIncome    614 non-null    int64  
 7   CoapplicantIncome  614 non-null    float64
 8   LoanAmount         592 non-null    float64
 9   Loan_Amount_Term   600 non-null    float64
 10  Credit_History     564 non-null    float64
 11  Property_Area      614 non-null    object 
 12  Loan_Status        614 non-null    object 
dtypes: float64(4), int64(1), object(8)
memory usage: 62.5+ KB


In [8]:
df_train.isnull().sum()

Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [9]:
df_train['Gender'].value_counts()

Male      489
Female    112
Name: Gender, dtype: int64

In [10]:
df_test.isnull().sum()

Loan_ID               0
Gender               11
Married               0
Dependents           10
Education             0
Self_Employed        23
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            5
Loan_Amount_Term      6
Credit_History       29
Property_Area         0
dtype: int64

In [11]:
sns.countplot(x=df_train['Credit_History'],hue = df_train['Loan_Status'])
plt.show()

In [12]:
df_train['Married'].value_counts()

Yes    398
No     213
Name: Married, dtype: int64

In [13]:
sns.countplot(x=df_train['Married'],hue = df_train['Loan_Status'])
plt.show()

In [14]:
df_train['Gender'].value_counts()

Male      489
Female    112
Name: Gender, dtype: int64

In [15]:
sns.countplot(x=df_train['Self_Employed'],hue = df_train['Loan_Status'])

<matplotlib.axes._subplots.AxesSubplot at 0x7f7dd8071f50>

In [16]:
df_train['Dependents'].value_counts()

0     345
1     102
2     101
3+     51
Name: Dependents, dtype: int64

In [17]:
#Interesting
df_train.groupby(['Dependents','Married'])['Married'].count()

Dependents  Married
0           No         171
            Yes        174
1           No          23
            Yes         79
2           No           8
            Yes         93
3+          No           7
            Yes         44
Name: Married, dtype: int64

In [18]:
sns.countplot(x=df_train['Loan_Amount_Term'],hue = df_train['Loan_Status'])

<matplotlib.axes._subplots.AxesSubplot at 0x7f7dd8071f50>

In [19]:
sns.boxplot(y=df_train['LoanAmount'])

<matplotlib.axes._subplots.AxesSubplot at 0x7f7dd8071f50>

# Feature Engineering

In [20]:
df_train['Credit_History'].fillna(-1,inplace=True)
df_test['Credit_History'].fillna(-1,inplace=True)

df_train['Married'].fillna(df_train['Married'].mode()[0],inplace=True)
df_test['Married'].fillna(df_test['Married'].mode()[0],inplace=True)

df_train['Gender'].fillna(df_train['Gender'].mode()[0],inplace=True)
df_test['Gender'].fillna(df_test['Gender'].mode()[0],inplace=True)

df_train['Self_Employed'].fillna('NA',inplace=True)
df_test['Self_Employed'].fillna('NA',inplace=True)

df_train['Dependents'].fillna(df_train['Dependents'].mode()[0],inplace=True)
df_test['Dependents'].fillna(df_test['Dependents'].mode()[0],inplace=True)


df_train['Loan_Amount_Term'].fillna(df_train['Loan_Amount_Term'].mode()[0],inplace=True)
df_test['Loan_Amount_Term'].fillna(df_test['Loan_Amount_Term'].mode()[0],inplace=True)

df_train['LoanAmount'].fillna(df_train['LoanAmount'].median(),inplace=True)
df_test['LoanAmount'].fillna(df_test['LoanAmount'].median(),inplace=True)

In [21]:
#Some terms are present in test for loan amount term which are not in train, we will round them to nearest
arr = np.array(df_train['Loan_Amount_Term'].value_counts().sort_values().index)
df_test['Loan_Amount_Term'] = df_test['Loan_Amount_Term'].apply(lambda x: x if x in arr else arr[arr>x].min())

In [22]:
arr

array([ 12.,  36.,  60., 120.,  84., 240., 300., 480., 180., 360.])

In [23]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Loan_ID            614 non-null    object 
 1   Gender             614 non-null    object 
 2   Married            614 non-null    object 
 3   Dependents         614 non-null    object 
 4   Education          614 non-null    object 
 5   Self_Employed      614 non-null    object 
 6   ApplicantIncome    614 non-null    int64  
 7   CoapplicantIncome  614 non-null    float64
 8   LoanAmount         614 non-null    float64
 9   Loan_Amount_Term   614 non-null    float64
 10  Credit_History     614 non-null    float64
 11  Property_Area      614 non-null    object 
 12  Loan_Status        614 non-null    object 
dtypes: float64(4), int64(1), object(8)
memory usage: 62.5+ KB


In [24]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
vars = ['Gender','Married','Dependents','Education','Self_Employed','Loan_Amount_Term','Credit_History','Property_Area']
df_train['Dependents'].map({'3+':'3','0':'0','1':'1','2':'2'})
df_test['Dependents'].map({'3+':'3','0':'0','1':'1','2':'2'})

0      0
1      1
2      2
3      2
4      0
      ..
362    3
363    0
364    0
365    0
366    0
Name: Dependents, Length: 367, dtype: object

In [25]:
for name in vars:
  df_train[name] = le.fit_transform(df_train[name])
  df_test[name] = le.transform(df_test[name])

In [26]:
df_train.drop('Loan_ID',inplace=True,axis=1)

In [27]:
numericals = ['ApplicantIncome','CoapplicantIncome','LoanAmount']
for name in numericals:
  sns.histplot(x=df_train[name])
  plt.show()
  sns.boxplot(y=df_train[name])
  plt.show()

In [28]:
df_train['CoappIncIsnull'] = np.where(df_train['CoapplicantIncome'] == 0 , 1, 0)
df_test['CoappIncIsnull'] = np.where(df_test['CoapplicantIncome'] == 0 , 1, 0)

In [29]:
le = LabelEncoder()
df_train['CoappIncIsnull'] = le.fit_transform(df_train['CoappIncIsnull'])
df_test['CoappIncIsnull'] = le.transform(df_test['CoappIncIsnull'])

In [30]:
df_train['ApplicantIncome'] = np.log1p(df_train['ApplicantIncome'])
df_test['ApplicantIncome'] = np.log1p(df_test['ApplicantIncome'])

df_train['CoapplicantIncome'] = np.log1p(df_train['CoapplicantIncome'])
df_test['CoapplicantIncome'] = np.log1p(df_test['CoapplicantIncome'])

df_train['LoanAmount'] = np.log1p(df_train['LoanAmount'])
df_test['LoanAmount'] = np.log1p(df_test['LoanAmount'])

In [31]:
X = df_train.drop('Loan_Status',axis=1)
y = df_train['Loan_Status']

# Handling Imbalance

In [32]:
#We have an imbalance in class we can generate synthetic samples from it.
y.value_counts()/len(y)

Y    0.687296
N    0.312704
Name: Loan_Status, dtype: float64

In [33]:
from imblearn.over_sampling import ADASYN

In [34]:
ada = ADASYN(sampling_strategy='minority',random_state=42,n_neighbors=5)
X_res,y_res = ada.fit_resample(X,y)



# Trying out Base Models

In [35]:
# Machine Learning
# Utils
from sklearn.model_selection import cross_val_score, RepeatedStratifiedKFold, StratifiedKFold
from sklearn import preprocessing
#Feature Selection
from sklearn.feature_selection import chi2, f_classif, f_regression, mutual_info_classif
from sklearn.feature_selection import mutual_info_regression, SelectKBest, SelectPercentile
from sklearn.feature_selection import VarianceThreshold
# Models
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from imblearn.ensemble import BalancedBaggingClassifier, BalancedRandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
# Unsupervised Models
from sklearn.cluster import KMeans
#Metrics
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier

In [38]:
def get_models():
    models = dict()
    models['gauss'] = GaussianNB()
    models['QDA'] = QuadraticDiscriminantAnalysis()
    models['lr'] = LogisticRegression(solver='liblinear')
    models['Dtree'] = DecisionTreeClassifier(random_state=42)
    models['linearSVC'] = SVC(kernel="linear", C=0.025)
    models['KernelSVC'] = SVC(gamma=2, C=1)
    models['neigh'] = KNeighborsClassifier(n_neighbors=5)
    models['rf'] = RandomForestClassifier(random_state=42)
    models['lgbm'] = LGBMClassifier(metric='binary_logloss',
                                    objective='binary',
                                    reg_alpha=2.945525898790487,
                                    max_depth=13,
                                    num_leaves=34,
                                    seed=42,
                                    learning_rate=0.0037601596530868493,
                                    n_estimators=1913)
    models['xgboost'] = XGBClassifier()
    models['BalBag'] = BalancedBaggingClassifier()
    #models['BalRF'] = BalancedRandomForestClassifier()
    
    return models

def evaluate_model(model, X, y):
    cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=3, random_state=42)
    scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv,
                             n_jobs=-1, error_score='raise')
    return scores

In [39]:
%%time

X = X_res
y = y_res

models = get_models()
results = []
names = []

for name, model in models.items():
    scores = evaluate_model(model, X, y)
    results.append(scores)
    names.append(name)
    print(f'{name} : {round(np.mean(scores),6)} ({round(np.std(scores),3)})')

gauss : 0.604895 (0.022)
QDA : 0.599456 (0.021)
lr : 0.63986 (0.021)
Dtree : 0.767677 (0.016)
linearSVC : 0.638306 (0.026)
KernelSVC : 0.80303 (0.024)
neigh : 0.701632 (0.031)
rf : 0.819736 (0.022)
lgbm : 0.787102 (0.022)
xgboost : 0.796037 (0.022)
BalBag : 0.789433 (0.017)
CPU times: user 336 ms, sys: 22.4 ms, total: 359 ms
Wall time: 7.74 s


# Install Optuna

In [182]:
!pip install optuna
import optuna



# LightGBM Tuning

In [183]:
def objective(trial):
  params = {
    'max_depth':trial.suggest_int("max_depth", 5, 20),
    'min_child_samples':trial.suggest_int("min_child_samples", 10, 30),
    'min_child_weight':trial.suggest_int('min_child_weight',1,10),
    'num_leaves': trial.suggest_int('num_leaves',20,40),
    "reg_lambda": trial.suggest_uniform("lr", 1e-5, 10),
    "reg_alpha": trial.suggest_uniform("lr", 1e-5, 10),
    'colsample_bytree': trial.suggest_float("colsample_bytree",0,1),
    'subsample': trial.suggest_float("subsample",0,1),
    "learning_rate": trial.suggest_uniform("learning_rate", 1e-5, 10)
    }
  clf = LGBMClassifier(**params)
  
  return cross_val_score(clf, X_res, y_res,scoring='accuracy',n_jobs=-1, cv=3).mean()
  


In [184]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)
trial = study.best_trial
best_param_light = study.best_params

[32m[I 2021-06-03 18:37:30,115][0m A new study created in memory with name: no-name-b87f3646-edc8-43a4-9d72-5c556a0a2c13[0m
[32m[I 2021-06-03 18:37:30,180][0m Trial 0 finished with value: 0.5314429484564124 and parameters: {'max_depth': 11, 'min_child_samples': 19, 'min_child_weight': 7, 'num_leaves': 34, 'lr': 7.717482604999142, 'colsample_bytree': 0.7725431995433466, 'subsample': 0.1972429069103282, 'learning_rate': 9.222631512402046}. Best is trial 0 with value: 0.5314429484564124.[0m
[32m[I 2021-06-03 18:37:30,237][0m Trial 1 finished with value: 0.6247926016591867 and parameters: {'max_depth': 6, 'min_child_samples': 29, 'min_child_weight': 10, 'num_leaves': 35, 'lr': 9.161655480045923, 'colsample_bytree': 0.3128584375500617, 'subsample': 0.9881231545699253, 'learning_rate': 6.027975220309659}. Best is trial 1 with value: 0.6247926016591867.[0m
[32m[I 2021-06-03 18:37:30,293][0m Trial 2 finished with value: 0.5198694410444717 and parameters: {'max_depth': 6, 'min_child_

In [185]:
print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(best_param_light))

Accuracy: 0.8194478444172446
Best hyperparameters: {'max_depth': 20, 'min_child_samples': 21, 'min_child_weight': 6, 'num_leaves': 23, 'lr': 0.9919250003754279, 'colsample_bytree': 0.7263293197291268, 'subsample': 0.8186269337425661, 'learning_rate': 0.5166296227376406}


In [186]:
lgbClf = LGBMClassifier(**best_param_light)
lgbClf.fit(X_res,y_res)

LGBMClassifier(boosting_type='gbdt', class_weight=None,
               colsample_bytree=0.7263293197291268, importance_type='split',
               learning_rate=0.5166296227376406, lr=0.9919250003754279,
               max_depth=20, min_child_samples=21, min_child_weight=6,
               min_split_gain=0.0, n_estimators=100, n_jobs=-1, num_leaves=23,
               objective=None, random_state=None, reg_alpha=0.0, reg_lambda=0.0,
               silent=True, subsample=0.8186269337425661,
               subsample_for_bin=200000, subsample_freq=0)

In [187]:
optuna.visualization.plot_optimization_history(study)

In [188]:
#Making Preds
preds = lgbClf.predict(df_test.drop('Loan_ID',axis=1).values)
df_submit = pd.DataFrame({
    'Loan_ID': df_test['Loan_ID'],
    'Loan_Status': preds
})
df_submit.to_csv('SubmissionLgbm.csv',index=None)

# XGBoost Tuning

In [189]:
def objective(trial):
  params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_uniform('learning_rate', 1e-1, 10),
        'subsample': trial.suggest_uniform('subsample', 0.50, 1),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.50, 1),
        'gamma': trial.suggest_int('gamma', 0,10),
        "reg_lambda": trial.suggest_uniform("lr", 1e-2, 10),
        "reg_alpha": trial.suggest_uniform("lr", 1e-2, 10),
        'objective':'binary:logistic'       
       }
  clf = XGBClassifier(**params)

  return cross_val_score(clf,X_res,y_res,scoring='accuracy',cv=3,n_jobs=-1).mean()

In [190]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)
trial = study.best_trial
best_param_xgb = study.best_params

[32m[I 2021-06-03 18:37:54,899][0m A new study created in memory with name: no-name-572ffe97-fa57-4c98-8f8d-1a3ad40def99[0m
[32m[I 2021-06-03 18:37:55,090][0m Trial 0 finished with value: 0.5058207534339726 and parameters: {'n_estimators': 173, 'max_depth': 10, 'learning_rate': 6.127306259349873, 'subsample': 0.9222409185167793, 'colsample_bytree': 0.5269689334605967, 'gamma': 8, 'lr': 1.3001351940338732}. Best is trial 0 with value: 0.5058207534339726.[0m
[32m[I 2021-06-03 18:37:55,459][0m Trial 1 finished with value: 0.5046783625730994 and parameters: {'n_estimators': 375, 'max_depth': 3, 'learning_rate': 7.839035822211026, 'subsample': 0.9530186299339959, 'colsample_bytree': 0.9710351244855333, 'gamma': 4, 'lr': 0.7852034552896222}. Best is trial 0 with value: 0.5058207534339726.[0m
[32m[I 2021-06-03 18:37:55,735][0m Trial 2 finished with value: 0.5023255813953489 and parameters: {'n_estimators': 295, 'max_depth': 13, 'learning_rate': 7.722853619157043, 'subsample': 0.991

In [191]:
print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(best_param_xgb))

Accuracy: 0.8276417788657691
Best hyperparameters: {'n_estimators': 229, 'max_depth': 7, 'learning_rate': 0.28561673147275884, 'subsample': 0.7876754550089806, 'colsample_bytree': 0.6142697184951356, 'gamma': 0, 'lr': 0.8459628987622669}


In [192]:
xgbClf = XGBClassifier(**best_param_xgb)
xgbClf.fit(X_res,y_res)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.6142697184951356, gamma=0,
              learning_rate=0.28561673147275884, lr=0.8459628987622669,
              max_delta_step=0, max_depth=7, min_child_weight=1, missing=None,
              n_estimators=229, n_jobs=1, nthread=None,
              objective='binary:logistic', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, seed=None, silent=None,
              subsample=0.7876754550089806, verbosity=1)

In [193]:
#Making Preds
preds = xgbClf.predict(df_test.drop('Loan_ID',axis=1).values)
df_submit = pd.DataFrame({
    'Loan_ID': df_test['Loan_ID'],
    'Loan_Status': preds
})
df_submit.to_csv('SubmissionXgb.csv',index=None)

# Model Stack

In [202]:
def objective(trial):
  estimators = [
  ('lgbm', LGBMClassifier(**best_param_light)),
  ('xgb', XGBClassifier(**best_param_xgb))
  ]

  params = {
      'C':trial.suggest_uniform('C',1e-5,15)
  }

  lr = LogisticRegression(**params)
  sclf = StackingClassifier(estimators=estimators, final_estimator=lr)
  sclf.fit(X_res,y_res)
  cv_score = cross_val_score(sclf, X_res,y_res,scoring="accuracy",cv =5).mean()
  return cv_score

In [203]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)
trial = study.best_trial
best_param_stack = study.best_params

[32m[I 2021-06-03 18:52:47,816][0m A new study created in memory with name: no-name-aef59686-0fab-4f14-99b9-4ebf473411bc[0m
[32m[I 2021-06-03 18:52:53,619][0m Trial 0 finished with value: 0.808969128246974 and parameters: {'C': 13.861946852533716}. Best is trial 0 with value: 0.808969128246974.[0m
[32m[I 2021-06-03 18:52:59,337][0m Trial 1 finished with value: 0.8078063375492996 and parameters: {'C': 3.676830836842457}. Best is trial 0 with value: 0.808969128246974.[0m
[32m[I 2021-06-03 18:53:05,114][0m Trial 2 finished with value: 0.8078063375492996 and parameters: {'C': 2.431746356421679}. Best is trial 0 with value: 0.808969128246974.[0m
[32m[I 2021-06-03 18:53:10,853][0m Trial 3 finished with value: 0.808969128246974 and parameters: {'C': 9.934011135592435}. Best is trial 0 with value: 0.808969128246974.[0m
[32m[I 2021-06-03 18:53:16,574][0m Trial 4 finished with value: 0.808969128246974 and parameters: {'C': 1.437967793241615}. Best is trial 0 with value: 0.808969

In [204]:
print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(best_param_stack))

Accuracy: 0.8101319189446485
Best hyperparameters: {'C': 0.14688020640905108}


In [205]:
estimators = [
('lgbm', LGBMClassifier(**best_param_light)),
('xgb', XGBClassifier(**best_param_xgb))
]
lr = LogisticRegression(C = 0.14 )
sclf = StackingClassifier(estimators=estimators, final_estimator=lr)
sclf.fit(X_res,y_res)

StackingClassifier(cv=None,
                   estimators=[('lgbm',
                                LGBMClassifier(boosting_type='gbdt',
                                               class_weight=None,
                                               colsample_bytree=0.7263293197291268,
                                               importance_type='split',
                                               learning_rate=0.5166296227376406,
                                               lr=0.9919250003754279,
                                               max_depth=20,
                                               min_child_samples=21,
                                               min_child_weight=6,
                                               min_split_gain=0.0,
                                               n_estimators=100, n_jobs=-1,
                                               num_leaves=23, objective=None,
                                               random...
               

In [206]:
#Making Preds
preds = sclf.predict(df_test.drop('Loan_ID',axis=1).values)
df_submit = pd.DataFrame({
    'Loan_ID': df_test['Loan_ID'],
    'Loan_Status': preds
})
df_submit.to_csv('SubmissionStack.csv',index=None)