# Analytics Vidhya Loan Prediction

In [1]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.model_selection import StratifiedKFold, cross_val_score, GridSearchCV
import os
os.chdir(r"C:\Training\Academy\Statistics (Python)\Datasets")

In [2]:
loan = pd.read_csv("loan.csv", index_col=0)
loan.info()

<class 'pandas.core.frame.DataFrame'>
Index: 614 entries, LP001002 to LP002990
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Gender             601 non-null    object 
 1   Married            611 non-null    object 
 2   Dependents         599 non-null    object 
 3   Education          614 non-null    object 
 4   Self_Employed      582 non-null    object 
 5   ApplicantIncome    614 non-null    int64  
 6   CoapplicantIncome  614 non-null    float64
 7   LoanAmount         592 non-null    float64
 8   Loan_Amount_Term   600 non-null    float64
 9   Credit_History     564 non-null    float64
 10  Property_Area      614 non-null    object 
 11  Loan_Status        614 non-null    object 
dtypes: float64(4), int64(1), object(7)
memory usage: 62.4+ KB


In [3]:
X = loan.drop('Loan_Status', axis=1)
y = loan['Loan_Status']

In [4]:
imp_cat = SimpleImputer(strategy='constant', fill_value="unknown").set_output(transform='pandas')
imp_num = SimpleImputer(strategy='median').set_output(transform='pandas')
trans_imp = make_column_transformer((imp_cat,
       make_column_selector(dtype_include=object)),
       (imp_num, make_column_selector(dtype_exclude=object)),verbose_feature_names_out=False)
trans_imp = trans_imp.set_output(transform='pandas')
# X_imp = trans_imp.fit_transform(X)
# print(X_imp.isnull().sum().sum())
# print(X_imp.columns)

In [5]:
ohe = OneHotEncoder(handle_unknown='ignore', 
      sparse_output=False, drop='first').set_output(transform='pandas')
trans_ohe = make_column_transformer((ohe,
       make_column_selector(dtype_include=object)),
       ('passthrough', make_column_selector(dtype_exclude=object)),verbose_feature_names_out=False)
# trans_ohe = trans_ohe.set_output(transform='pandas')
# X_imp_ohe = trans_ohe.fit_transform(X_imp)
# X_imp_ohe.dtypes

In [6]:
lr = LogisticRegression(random_state=24)
pipe = Pipeline([('IMP',trans_imp),('OHE',trans_ohe),('LR',lr)])
kfold = StratifiedKFold(n_splits=5, random_state=24, shuffle=True)
params = {'LR__solver':['lbfgs','liblinear', 'newton-cg','newton-cholesky', 'sag','saga'],
          'LR__C':np.linspace(0.001, 10, 5)}
gcv = GridSearchCV(pipe, param_grid=params,  scoring='roc_auc',cv=kfold, verbose=3)
gcv.fit(X,y)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV 1/5] END .....LR__C=0.001, LR__solver=lbfgs;, score=0.431 total time=   0.0s
[CV 2/5] END .....LR__C=0.001, LR__solver=lbfgs;, score=0.495 total time=   0.0s
[CV 3/5] END .....LR__C=0.001, LR__solver=lbfgs;, score=0.440 total time=   0.0s
[CV 4/5] END .....LR__C=0.001, LR__solver=lbfgs;, score=0.604 total time=   0.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[CV 5/5] END .....LR__C=0.001, LR__solver=lbfgs;, score=0.573 total time=   0.0s
[CV 1/5] END .LR__C=0.001, LR__solver=liblinear;, score=0.437 total time=   0.0s
[CV 2/5] END .LR__C=0.001, LR__solver=liblinear;, score=0.636 total time=   0.0s
[CV 3/5] END .LR__C=0.001, LR__solver=liblinear;, score=0.458 total time=   0.0s
[CV 4/5] END .LR__C=0.001, LR__solver=liblinear;, score=0.629 total time=   0.0s
[CV 5/5] END .LR__C=0.001, LR__solver=liblinear;, score=0.554 total time=   0.0s
[CV 1/5] END .LR__C=0.001, LR__solver=newton-cg;, score=0.485 total time=   0.0s
[CV 2/5] END .LR__C=0.001, LR__solver=newton-cg;, score=0.523 total time=   0.0s
[CV 3/5] END .LR__C=0.001, LR__solver=newton-cg;, score=0.541 total time=   0.0s
[CV 4/5] END .LR__C=0.001, LR__solver=newton-cg;, score=0.587 total time=   0.0s
[CV 5/5] END .LR__C=0.001, LR__solver=newton-cg;, score=0.647 total time=   0.0s
[CV 1/5] END LR__C=0.001, LR__solver=newton-cholesky;, score=0.485 total time=   0.0s
[CV 2/5] END LR__C=0.00



[CV 3/5] END .......LR__C=0.001, LR__solver=sag;, score=0.466 total time=   0.0s
[CV 4/5] END .......LR__C=0.001, LR__solver=sag;, score=0.530 total time=   0.0s
[CV 5/5] END .......LR__C=0.001, LR__solver=sag;, score=0.448 total time=   0.0s
[CV 1/5] END ......LR__C=0.001, LR__solver=saga;, score=0.436 total time=   0.0s
[CV 2/5] END ......LR__C=0.001, LR__solver=saga;, score=0.539 total time=   0.0s
[CV 3/5] END ......LR__C=0.001, LR__solver=saga;, score=0.485 total time=   0.0s




[CV 4/5] END ......LR__C=0.001, LR__solver=saga;, score=0.542 total time=   0.0s
[CV 5/5] END ......LR__C=0.001, LR__solver=saga;, score=0.466 total time=   0.0s
[CV 1/5] END ...LR__C=2.50075, LR__solver=lbfgs;, score=0.745 total time=   0.0s
[CV 2/5] END ...LR__C=2.50075, LR__solver=lbfgs;, score=0.730 total time=   0.0s
[CV 3/5] END ...LR__C=2.50075, LR__solver=lbfgs;, score=0.726 total time=   0.0s
[CV 4/5] END ...LR__C=2.50075, LR__solver=lbfgs;, score=0.813 total time=   0.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[CV 5/5] END ...LR__C=2.50075, LR__solver=lbfgs;, score=0.782 total time=   0.0s
[CV 1/5] END LR__C=2.50075, LR__solver=liblinear;, score=0.746 total time=   0.0s
[CV 2/5] END LR__C=2.50075, LR__solver=liblinear;, score=0.721 total time=   0.0s
[CV 3/5] END LR__C=2.50075, LR__solver=liblinear;, score=0.735 total time=   0.0s
[CV 4/5] END LR__C=2.50075, LR__solver=liblinear;, score=0.813 total time=   0.0s
[CV 5/5] END LR__C=2.50075, LR__solver=liblinear;, score=0.769 total time=   0.0s
[CV 1/5] END LR__C=2.50075, LR__solver=newton-cg;, score=0.740 total time=   0.0s
[CV 2/5] END LR__C=2.50075, LR__solver=newton-cg;, score=0.720 total time=   0.0s
[CV 3/5] END LR__C=2.50075, LR__solver=newton-cg;, score=0.735 total time=   0.0s
[CV 4/5] END LR__C=2.50075, LR__solver=newton-cg;, score=0.810 total time=   0.0s
[CV 5/5] END LR__C=2.50075, LR__solver=newton-cg;, score=0.765 total time=   0.0s
[CV 1/5] END LR__C=2.50075, LR__solver=newton-cholesky;, score=0.740 total time=   0.0s
[CV 2/5] EN



[CV 3/5] END .....LR__C=2.50075, LR__solver=sag;, score=0.466 total time=   0.0s
[CV 4/5] END .....LR__C=2.50075, LR__solver=sag;, score=0.530 total time=   0.0s
[CV 5/5] END .....LR__C=2.50075, LR__solver=sag;, score=0.448 total time=   0.0s
[CV 1/5] END ....LR__C=2.50075, LR__solver=saga;, score=0.436 total time=   0.0s
[CV 2/5] END ....LR__C=2.50075, LR__solver=saga;, score=0.539 total time=   0.0s
[CV 3/5] END ....LR__C=2.50075, LR__solver=saga;, score=0.485 total time=   0.0s
[CV 4/5] END ....LR__C=2.50075, LR__solver=saga;, score=0.542 total time=   0.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END ....LR__C=2.50075, LR__solver=saga;, score=0.466 total time=   0.0s
[CV 1/5] END LR__C=5.000500000000001, LR__solver=lbfgs;, score=0.736 total time=   0.0s
[CV 2/5] END LR__C=5.000500000000001, LR__solver=lbfgs;, score=0.729 total time=   0.0s
[CV 3/5] END LR__C=5.000500000000001, LR__solver=lbfgs;, score=0.726 total time=   0.0s
[CV 4/5] END LR__C=5.000500000000001, LR__solver=lbfgs;, score=0.812 total time=   0.0s
[CV 5/5] END LR__C=5.000500000000001, LR__solver=lbfgs;, score=0.782 total time=   0.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[CV 1/5] END LR__C=5.000500000000001, LR__solver=liblinear;, score=0.746 total time=   0.0s
[CV 2/5] END LR__C=5.000500000000001, LR__solver=liblinear;, score=0.721 total time=   0.0s
[CV 3/5] END LR__C=5.000500000000001, LR__solver=liblinear;, score=0.725 total time=   0.0s
[CV 4/5] END LR__C=5.000500000000001, LR__solver=liblinear;, score=0.814 total time=   0.0s
[CV 5/5] END LR__C=5.000500000000001, LR__solver=liblinear;, score=0.770 total time=   0.0s
[CV 1/5] END LR__C=5.000500000000001, LR__solver=newton-cg;, score=0.741 total time=   0.0s
[CV 2/5] END LR__C=5.000500000000001, LR__solver=newton-cg;, score=0.717 total time=   0.0s
[CV 3/5] END LR__C=5.000500000000001, LR__solver=newton-cg;, score=0.734 total time=   0.0s
[CV 4/5] END LR__C=5.000500000000001, LR__solver=newton-cg;, score=0.809 total time=   0.0s
[CV 5/5] END LR__C=5.000500000000001, LR__solver=newton-cg;, score=0.765 total time=   0.0s
[CV 1/5] END LR__C=5.000500000000001, LR__solver=newton-cholesky;, score=0.741 t



[CV 2/5] END LR__C=5.000500000000001, LR__solver=sag;, score=0.549 total time=   0.0s
[CV 3/5] END LR__C=5.000500000000001, LR__solver=sag;, score=0.466 total time=   0.0s
[CV 4/5] END LR__C=5.000500000000001, LR__solver=sag;, score=0.530 total time=   0.0s
[CV 5/5] END LR__C=5.000500000000001, LR__solver=sag;, score=0.448 total time=   0.0s
[CV 1/5] END LR__C=5.000500000000001, LR__solver=saga;, score=0.436 total time=   0.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[CV 2/5] END LR__C=5.000500000000001, LR__solver=saga;, score=0.539 total time=   0.0s
[CV 3/5] END LR__C=5.000500000000001, LR__solver=saga;, score=0.485 total time=   0.0s
[CV 4/5] END LR__C=5.000500000000001, LR__solver=saga;, score=0.542 total time=   0.0s
[CV 5/5] END LR__C=5.000500000000001, LR__solver=saga;, score=0.466 total time=   0.0s
[CV 1/5] END ...LR__C=7.50025, LR__solver=lbfgs;, score=0.736 total time=   0.0s
[CV 2/5] END ...LR__C=7.50025, LR__solver=lbfgs;, score=0.729 total time=   0.0s
[CV 3/5] END ...LR__C=7.50025, LR__solver=lbfgs;, score=0.725 total time=   0.0s
[CV 4/5] END ...LR__C=7.50025, LR__solver=lbfgs;, score=0.813 total time=   0.0s
[CV 5/5] END ...LR__C=7.50025, LR__solver=lbfgs;, score=0.782 total time=   0.0s
[CV 1/5] END LR__C=7.50025, LR__solver=liblinear;, score=0.746 total time=   0.0s
[CV 2/5] END LR__C=7.50025, LR__solver=liblinear;, score=0.720 total time=   0.0s
[CV 3/5] END LR__C=7.50025, LR__solver=liblinear;, score=0.725 total time=   0.0s
[

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END LR__C=7.50025, LR__solver=newton-cg;, score=0.741 total time=   0.0s
[CV 2/5] END LR__C=7.50025, LR__solver=newton-cg;, score=0.716 total time=   0.0s
[CV 3/5] END LR__C=7.50025, LR__solver=newton-cg;, score=0.733 total time=   0.0s
[CV 4/5] END LR__C=7.50025, LR__solver=newton-cg;, score=0.808 total time=   0.0s
[CV 5/5] END LR__C=7.50025, LR__solver=newton-cg;, score=0.765 total time=   0.0s
[CV 1/5] END LR__C=7.50025, LR__solver=newton-cholesky;, score=0.741 total time=   0.0s
[CV 2/5] END LR__C=7.50025, LR__solver=newton-cholesky;, score=0.716 total time=   0.0s
[CV 3/5] END LR__C=7.50025, LR__solver=newton-cholesky;, score=0.733 total time=   0.0s
[CV 4/5] END LR__C=7.50025, LR__solver=newton-cholesky;, score=0.809 total time=   0.0s
[CV 5/5] END LR__C=7.50025, LR__solver=newton-cholesky;, score=0.765 total time=   0.0s




[CV 1/5] END .....LR__C=7.50025, LR__solver=sag;, score=0.424 total time=   0.0s
[CV 2/5] END .....LR__C=7.50025, LR__solver=sag;, score=0.549 total time=   0.0s
[CV 3/5] END .....LR__C=7.50025, LR__solver=sag;, score=0.466 total time=   0.0s
[CV 4/5] END .....LR__C=7.50025, LR__solver=sag;, score=0.530 total time=   0.0s
[CV 5/5] END .....LR__C=7.50025, LR__solver=sag;, score=0.448 total time=   0.0s
[CV 1/5] END ....LR__C=7.50025, LR__solver=saga;, score=0.436 total time=   0.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[CV 2/5] END ....LR__C=7.50025, LR__solver=saga;, score=0.539 total time=   0.0s
[CV 3/5] END ....LR__C=7.50025, LR__solver=saga;, score=0.485 total time=   0.0s
[CV 4/5] END ....LR__C=7.50025, LR__solver=saga;, score=0.542 total time=   0.0s
[CV 5/5] END ....LR__C=7.50025, LR__solver=saga;, score=0.466 total time=   0.0s
[CV 1/5] END ......LR__C=10.0, LR__solver=lbfgs;, score=0.736 total time=   0.0s
[CV 2/5] END ......LR__C=10.0, LR__solver=lbfgs;, score=0.729 total time=   0.0s
[CV 3/5] END ......LR__C=10.0, LR__solver=lbfgs;, score=0.725 total time=   0.0s
[CV 4/5] END ......LR__C=10.0, LR__solver=lbfgs;, score=0.813 total time=   0.0s
[CV 5/5] END ......LR__C=10.0, LR__solver=lbfgs;, score=0.782 total time=   0.0s
[CV 1/5] END ..LR__C=10.0, LR__solver=liblinear;, score=0.735 total time=   0.0s
[CV 2/5] END ..LR__C=10.0, LR__solver=liblinear;, score=0.720 total time=   0.0s
[CV 3/5] END ..LR__C=10.0, LR__solver=liblinear;, score=0.725 total time=   0.0s
[CV 4/5] END ..LR__C=10.0, L

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END ..LR__C=10.0, LR__solver=newton-cg;, score=0.740 total time=   0.0s
[CV 2/5] END ..LR__C=10.0, LR__solver=newton-cg;, score=0.715 total time=   0.0s
[CV 3/5] END ..LR__C=10.0, LR__solver=newton-cg;, score=0.732 total time=   0.0s
[CV 4/5] END ..LR__C=10.0, LR__solver=newton-cg;, score=0.809 total time=   0.0s
[CV 5/5] END ..LR__C=10.0, LR__solver=newton-cg;, score=0.765 total time=   0.0s
[CV 1/5] END LR__C=10.0, LR__solver=newton-cholesky;, score=0.740 total time=   0.0s
[CV 2/5] END LR__C=10.0, LR__solver=newton-cholesky;, score=0.715 total time=   0.0s
[CV 3/5] END LR__C=10.0, LR__solver=newton-cholesky;, score=0.732 total time=   0.0s
[CV 4/5] END LR__C=10.0, LR__solver=newton-cholesky;, score=0.809 total time=   0.0s
[CV 5/5] END LR__C=10.0, LR__solver=newton-cholesky;, score=0.765 total time=   0.0s




[CV 1/5] END ........LR__C=10.0, LR__solver=sag;, score=0.424 total time=   0.0s
[CV 2/5] END ........LR__C=10.0, LR__solver=sag;, score=0.549 total time=   0.0s
[CV 3/5] END ........LR__C=10.0, LR__solver=sag;, score=0.466 total time=   0.0s
[CV 4/5] END ........LR__C=10.0, LR__solver=sag;, score=0.530 total time=   0.0s
[CV 5/5] END ........LR__C=10.0, LR__solver=sag;, score=0.448 total time=   0.0s
[CV 1/5] END .......LR__C=10.0, LR__solver=saga;, score=0.436 total time=   0.0s
[CV 2/5] END .......LR__C=10.0, LR__solver=saga;, score=0.539 total time=   0.0s
[CV 3/5] END .......LR__C=10.0, LR__solver=saga;, score=0.485 total time=   0.0s
[CV 4/5] END .......LR__C=10.0, LR__solver=saga;, score=0.542 total time=   0.0s
[CV 5/5] END .......LR__C=10.0, LR__solver=saga;, score=0.466 total time=   0.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [7]:
print(gcv.best_params_)
print(gcv.best_score_)

{'LR__C': 2.50075, 'LR__solver': 'lbfgs'}
0.7593120055039561


In [8]:
bm = gcv.best_estimator_
bm

### Inferencing

In [10]:
test = pd.read_csv(r"C:\Training\AV\Loan Prediction\test_lAUu6dG.csv", index_col=0)
test.head()

Unnamed: 0_level_0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
Loan_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
LP001015,Male,Yes,0,Graduate,No,5720,0,110.0,360.0,1.0,Urban
LP001022,Male,Yes,1,Graduate,No,3076,1500,126.0,360.0,1.0,Urban
LP001031,Male,Yes,2,Graduate,No,5000,1800,208.0,360.0,1.0,Urban
LP001035,Male,Yes,2,Graduate,No,2340,2546,100.0,360.0,,Urban
LP001051,Male,No,0,Not Graduate,No,3276,0,78.0,360.0,1.0,Urban


In [11]:
submit = pd.read_csv(r"C:\Training\AV\Loan Prediction\sample_submission_49d68Cx.csv")
submit['Loan_Status'] = bm.predict(test)
submit.head()

Unnamed: 0,Loan_ID,Loan_Status
0,LP001015,Y
1,LP001022,Y
2,LP001031,Y
3,LP001035,Y
4,LP001051,Y


This `submit`'s data can be exported to .csv and uploaded in solutions checker.

## K-NN

In [32]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler
knn = KNeighborsClassifier()
scl_mm, scl_std = MinMaxScaler(), StandardScaler()
pipe = Pipeline([('IMP',trans_imp),('OHE',trans_ohe),('SCL',None),('KNN',knn)])
kfold = StratifiedKFold(n_splits=5, random_state=24, shuffle=True)
params = {'KNN__n_neighbors':np.arange(1,8),'SCL':[None, scl_mm, scl_std]}
gcv = GridSearchCV(pipe, param_grid=params,  scoring='roc_auc',cv=kfold, verbose=3)
gcv.fit(X,y)

Fitting 5 folds for each of 21 candidates, totalling 105 fits
[CV 1/5] END ......KNN__n_neighbors=1, SCL=None;, score=0.512 total time=   0.0s
[CV 2/5] END ......KNN__n_neighbors=1, SCL=None;, score=0.512 total time=   0.0s
[CV 3/5] END ......KNN__n_neighbors=1, SCL=None;, score=0.484 total time=   0.0s
[CV 4/5] END ......KNN__n_neighbors=1, SCL=None;, score=0.467 total time=   0.0s
[CV 5/5] END ......KNN__n_neighbors=1, SCL=None;, score=0.439 total time=   0.0s
[CV 1/5] END KNN__n_neighbors=1, SCL=MinMaxScaler();, score=0.668 total time=   0.0s
[CV 2/5] END KNN__n_neighbors=1, SCL=MinMaxScaler();, score=0.646 total time=   0.0s
[CV 3/5] END KNN__n_neighbors=1, SCL=MinMaxScaler();, score=0.641 total time=   0.0s
[CV 4/5] END KNN__n_neighbors=1, SCL=MinMaxScaler();, score=0.691 total time=   0.0s
[CV 5/5] END KNN__n_neighbors=1, SCL=MinMaxScaler();, score=0.692 total time=   0.0s
[CV 1/5] END KNN__n_neighbors=1, SCL=StandardScaler();, score=0.586 total time=   0.0s
[CV 2/5] END KNN__n_n

Fitting 5 folds for each of 7 candidates, totalling 35 fits
[CV 1/5] END ................KNN__n_neighbors=1;, score=0.512 total time=   0.1s
[CV 2/5] END ................KNN__n_neighbors=1;, score=0.512 total time=   0.0s
[CV 3/5] END ................KNN__n_neighbors=1;, score=0.484 total time=   0.0s
[CV 4/5] END ................KNN__n_neighbors=1;, score=0.467 total time=   0.0s
[CV 5/5] END ................KNN__n_neighbors=1;, score=0.439 total time=   0.0s
[CV 1/5] END ................KNN__n_neighbors=2;, score=0.529 total time=   0.0s
[CV 2/5] END ................KNN__n_neighbors=2;, score=0.490 total time=   0.0s
[CV 3/5] END ................KNN__n_neighbors=2;, score=0.562 total time=   0.0s
[CV 4/5] END ................KNN__n_neighbors=2;, score=0.487 total time=   0.0s
[CV 5/5] END ................KNN__n_neighbors=2;, score=0.410 total time=   0.0s
[CV 1/5] END ................KNN__n_neighbors=3;, score=0.544 total time=   0.0s
[CV 2/5] END ................KNN__n_neighbors=3;,