In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn.metrics as metrics 
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
import xgboost as xgb

In [2]:
def classificationMetrics(y, yhat):
    prf1 = metrics.precision_recall_fscore_support(y,yhat)
    res = {'Accuracy': metrics.accuracy_score(y,yhat),
           'Precision':prf1[0][1],
           'Recall': prf1[1][1],
           'f1-score': prf1[2][1],
           'Log-loss': metrics.log_loss(y,yhat),
           'AUC': metrics.roc_auc_score(y,yhat)
          }
    return res

In [3]:
import pickle
# Load the DataFrame from the pickle file using pandas
df = pd.read_csv("C:\\Users\\diana\\Downloads\\Loans\\LoanApprovals\\loans_clean4.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,0,1,0,0.0,0,0,5849.0,0.0,128.0,360.0,1.0,2,1
1,1,1,1,1.0,0,0,4583.0,1508.0,128.0,360.0,1.0,0,0
2,2,1,1,0.0,0,1,3000.0,0.0,66.0,360.0,1.0,2,1
3,3,1,1,0.0,1,0,2583.0,2358.0,120.0,360.0,1.0,2,1
4,4,1,0,0.0,0,0,6000.0,0.0,141.0,360.0,1.0,2,1


In [4]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=['Loan_Status'])
y = df['Loan_Status']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [6]:
models_list = pd.DataFrame()

In [7]:
mod1 = LogisticRegression(max_iter=2000, solver='newton-cg')
mod1.fit(X,y)



In [8]:
pred1 = mod1.predict(X)
model_dict = {'model': 'Logistic Regression'}

model_metrics = classificationMetrics(y, pred1)
result_dict = {**model_dict, **model_metrics}
models_list = pd.concat([models_list, pd.DataFrame([result_dict])], ignore_index=True)

In [9]:
pd.crosstab(y, pred1)

col_0,0,1
Loan_Status,Unnamed: 1_level_1,Unnamed: 2_level_1
0,82,110
1,8,414


In [10]:
import sklearn.metrics as skmet
cmat=skmet.confusion_matrix(y,pred1)

In [11]:
mod3 = RandomForestClassifier(max_depth=11, max_features=7)
mod3.fit(X,y)

In [12]:
pred3 = mod3.predict(X)
model_dict = {'model': "RandomForest"}

model_metrics = classificationMetrics(y, pred3)
result_dict = {**model_dict, **model_metrics}
models_list = pd.concat([models_list, pd.DataFrame([result_dict])], ignore_index=True)

In [13]:
pd.crosstab(y, pred3)

col_0,0,1
Loan_Status,Unnamed: 1_level_1,Unnamed: 2_level_1
0,180,12
1,0,422


In [14]:
mod6 = SVC(probability=False)
mod6.fit(X,y)

In [15]:
pred6 = mod6.predict(X)
model_dict = {'model': "SVC"}
model_metrics = classificationMetrics(y, pred6)

  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
result_dict = {**model_dict, **model_metrics}
models_list = pd.concat([models_list, pd.DataFrame([result_dict])], ignore_index=True)

In [17]:
mod4 = AdaBoostClassifier(random_state=1)
mod4.fit(X,y)

In [18]:
pred4 = mod4.predict(X)
model_dict = {'model': "ADABoost"}
new_row = pd.DataFrame([{**model_dict, **classificationMetrics(y, pred4)}])
models_list = pd.concat([models_list, new_row], ignore_index=True)

In [19]:
mod5 = GradientBoostingClassifier(random_state=1)
mod5.fit(X,y)

In [20]:
pred5 = mod5.predict(X)
model_dict = {'model': "GBM"}
new_row = pd.DataFrame([{**model_dict, **classificationMetrics(y, pred5)}])
models_list = pd.concat([models_list, new_row], ignore_index=True)

In [21]:
mod7 = xgb.XGBClassifier()
mod7.fit(X,y)

In [22]:
pred7 = mod7.predict(X)
model_dict = {'model': "XGB"}
new_row = pd.DataFrame([{**model_dict, **classificationMetrics(y, pred7)}])
models_list = pd.concat([models_list, new_row], ignore_index=True)
models_list.sort_values('Accuracy', ascending=False)

Unnamed: 0,model,Accuracy,Precision,Recall,f1-score,Log-loss,AUC
5,XGB,1.0,1.0,1.0,1.0,2.220446e-16,1.0
1,RandomForest,0.980456,0.97235,1.0,0.985981,0.7044362,0.96875
4,GBM,0.882736,0.860082,0.990521,0.920705,4.226617,0.818177
3,ADABoost,0.832248,0.819639,0.969194,0.888165,6.046411,0.750222
0,Logistic Regression,0.807818,0.790076,0.981043,0.875264,6.926956,0.704063
2,SVC,0.687296,0.687296,1.0,0.814672,11.27098,0.5
