### **Import Libraries**

In [None]:
import numpy as np
import pandas as pd
import re
import time
from scipy import stats
from scipy import io

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
import pickle
import warnings
warnings.filterwarnings('ignore')

### **Load data**

In [None]:
df_Loan = pd.read_csv ('loans-checkpoint.csv',index_col='client_id')
df_Loan.head()

Unnamed: 0_level_0,loan_type,loan_amount,repaid,loan_id,loan_start,loan_end,rate
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
46109,home,13672,0,10243,2002-04-16,2003-12-20,2.15
46109,credit,9794,0,10984,2003-10-21,2005-07-17,1.25
46109,home,12734,1,10990,2006-02-01,2007-07-05,0.68
46109,cash,12518,1,10596,2010-12-08,2013-05-05,1.24
46109,credit,14049,1,11415,2010-07-07,2012-05-21,3.13


In [None]:
df_Loan.info()

<class 'pandas.core.frame.DataFrame'>
Index: 443 entries, 46109 to 26945
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   loan_type    443 non-null    object 
 1   loan_amount  443 non-null    int64  
 2   repaid       443 non-null    int64  
 3   loan_id      443 non-null    int64  
 4   loan_start   443 non-null    object 
 5   loan_end     443 non-null    object 
 6   rate         443 non-null    float64
dtypes: float64(1), int64(3), object(3)
memory usage: 27.7+ KB


In [None]:
df_Loan.describe()

Unnamed: 0,loan_amount,repaid,loan_id,rate
count,443.0,443.0,443.0,443.0
mean,7982.311512,0.534989,11017.10158,3.217156
std,4172.891992,0.499338,581.826222,2.397168
min,559.0,0.0,10009.0,0.01
25%,4232.5,0.0,10507.5,1.22
50%,8320.0,1.0,11033.0,2.78
75%,11739.0,1.0,11526.0,4.75
max,14971.0,1.0,11991.0,12.62


In [None]:
df_Loan.isna().sum()

loan_type      0
loan_amount    0
repaid         0
loan_id        0
loan_start     0
loan_end       0
rate           0
dtype: int64

In [None]:
df_Loan.isnull().sum()

loan_type      0
loan_amount    0
repaid         0
loan_id        0
loan_start     0
loan_end       0
rate           0
dtype: int64

### **Preprocessing**

In [None]:
df_Loan['loan_start'] = pd.to_datetime(df_Loan['loan_start'], format='%Y-%m-%d')
df_Loan['loan_end'] = pd.to_datetime(df_Loan['loan_end'], format='%Y-%m-%d')

In [None]:
df_Loan['difference_in_days']=(df_Loan['loan_end']- df_Loan['loan_start']).dt.days

In [None]:
df_Loan['loan_id'] = df_Loan['loan_id'].astype('object')
df_Loan['repaid'] = df_Loan['repaid'].astype('category')

In [None]:
df= df_Loan.drop(columns=['loan_id','loan_start','loan_end'])

df.head()

Unnamed: 0_level_0,loan_type,loan_amount,repaid,rate,difference_in_days
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
46109,home,13672,0,2.15,613
46109,credit,9794,0,1.25,635
46109,home,12734,1,0.68,519
46109,cash,12518,1,1.24,879
46109,credit,14049,1,3.13,684


In [None]:
df1 = df.drop('repaid', axis='columns')
x=df1.iloc[:,:].values
y=df.iloc[:,2].values


In [None]:
le = LabelEncoder()
x[:,0] = le.fit_transform(x[:,0])
x

array([[2, 13672, 2.15, 613],
       [1, 9794, 1.25, 635],
       [2, 12734, 0.68, 519],
       ...,
       [3, 9329, 5.65, 948],
       [2, 4197, 4.5, 633],
       [2, 3643, 0.13, 638]], dtype=object)

In [None]:
col_trans = ColumnTransformer([('loan_type',OneHotEncoder(),[0])],remainder='passthrough')
X = col_trans.fit_transform(x)
X

array([[0.0, 0.0, 1.0, ..., 13672, 2.15, 613],
       [0.0, 1.0, 0.0, ..., 9794, 1.25, 635],
       [0.0, 0.0, 1.0, ..., 12734, 0.68, 519],
       ...,
       [0.0, 0.0, 0.0, ..., 9329, 5.65, 948],
       [0.0, 0.0, 1.0, ..., 4197, 4.5, 633],
       [0.0, 0.0, 1.0, ..., 3643, 0.13, 638]], dtype=object)

In [None]:
ss = StandardScaler()
X = ss.fit_transform(X)
X

array([[-0.56779227, -0.56431585,  1.63130531, ...,  1.36502962,
        -0.44567691, -0.99063984],
       [-0.56779227,  1.77205726, -0.61300603, ...,  0.43464742,
        -0.82154446, -0.83378424],
       [-0.56779227, -0.56431585,  1.63130531, ...,  1.13999132,
        -1.0595939 , -1.66084104],
       ...,
       [-0.56779227, -0.56431585, -0.61300603, ...,  0.32308793,
         1.01603022,  1.39784316],
       [-0.56779227, -0.56431585,  1.63130531, ..., -0.90814503,
         0.53575502, -0.84804384],
       [-0.56779227, -0.56431585,  1.63130531, ..., -1.04105677,
        -1.28929074, -0.81239484]])