In [3]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
#Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [4]:
df=pd.read_csv('WA_Fn-UseC_-HR-Employee-Attrition.csv')
#df=df.drop(['EmployeeNumber','EmployeeCount','StandardHours','Over18'],axis=1)
df.columns

Index(['Age', 'Attrition', 'BusinessTravel', 'DailyRate', 'Department',
       'DistanceFromHome', 'Education', 'EducationField', 'EmployeeCount',
       'EmployeeNumber', 'EnvironmentSatisfaction', 'Gender', 'HourlyRate',
       'JobInvolvement', 'JobLevel', 'JobRole', 'JobSatisfaction',
       'MaritalStatus', 'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked',
       'Over18', 'OverTime', 'PercentSalaryHike', 'PerformanceRating',
       'RelationshipSatisfaction', 'StandardHours', 'StockOptionLevel',
       'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance',
       'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',
       'YearsWithCurrManager'],
      dtype='object')

In [5]:
#Binning
df['MonthlyIncome']=pd.cut(df['MonthlyIncome'],bins=4,labels=['Very low','Low','Moderate','High'])
#Binning
df['Age']=pd.cut(df['Age'],bins=3,labels=['Youth','Middle Aged','Elderly'])

In [6]:
df_num=df.select_dtypes(include=['int64','float'])
df_cat=df.select_dtypes(exclude=['int64','float'])
df_cat.columns

Index(['Age', 'Attrition', 'BusinessTravel', 'Department', 'EducationField',
       'Gender', 'JobRole', 'MaritalStatus', 'MonthlyIncome', 'Over18',
       'OverTime'],
      dtype='object')

In [7]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
df_cat['MonthlyIncome']=le.fit_transform(df['MonthlyIncome'])
df_cat['Age']=le.fit_transform(df['Age'])
df_cat['BusinessTravel']=le.fit_transform(df['BusinessTravel'])
df_cat['Department']=le.fit_transform(df['Department'])
df_cat['EducationField']=le.fit_transform(df['EducationField'])
df_cat['Gender']=le.fit_transform(df['Gender'])
df_cat['JobRole']=le.fit_transform(df['JobRole'])
df_cat['MaritalStatus']=le.fit_transform(df['MaritalStatus'])
df_cat['OverTime']=le.fit_transform(df['OverTime'])

In [8]:
df1=pd.concat([df_num,df_cat],axis=1)
df1=df1.drop(['EmployeeCount','StandardHours','Over18'],axis=1)


In [9]:
df1.head()

Unnamed: 0,DailyRate,DistanceFromHome,Education,EmployeeNumber,EnvironmentSatisfaction,HourlyRate,JobInvolvement,JobLevel,JobSatisfaction,MonthlyRate,...,Age,Attrition,BusinessTravel,Department,EducationField,Gender,JobRole,MaritalStatus,MonthlyIncome,OverTime
0,1102,1,2,1,2,94,3,2,4,19479,...,1,Yes,2,2,1,0,7,2,1,1
1,279,8,1,2,3,61,2,2,2,24907,...,0,No,1,1,1,1,6,1,3,0
2,1373,2,2,4,4,92,2,1,3,2396,...,1,Yes,2,1,4,1,2,2,3,1
3,1392,3,4,5,4,56,3,1,3,23159,...,1,No,1,1,1,0,6,1,3,1
4,591,2,1,7,1,40,3,1,2,16632,...,2,No,2,1,3,1,2,1,3,0


In [10]:
df1['Attrition']=df1['Attrition'].replace({'Yes':1,'No':0})

In [11]:

#Scale the input
from sklearn.preprocessing import StandardScaler
x=df1.drop('Attrition',axis=1)
y=df1['Attrition']


In [12]:
# type your code here
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
#RFE
from sklearn.feature_selection import RFE
from sklearn.metrics import r2_score
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.3,random_state=3)


In [13]:
for i in [2,3,4,5,6,7,8,9,10,11,12]:
    lr=LogisticRegression()
    rfe_mod=RFE(estimator=lr,n_features_to_select=i)
    rfe_feat_mod=rfe_mod.fit(xtrain,ytrain)
    rank=rfe_feat_mod.ranking_
    res=pd.DataFrame()
    res['feature']=xtrain.columns
    res['rank']=rank
    col=res[res['rank']==1]['feature']
    lr=LogisticRegression()
    lr.fit(xtrain[col],ytrain)
    ypred=lr.predict(xtest[col])
    print('feature subset : ',i)
    print('R2 : ',r2_score(ytest,ypred))

feature subset :  2
R2 :  -0.20491803278688536
feature subset :  3
R2 :  -0.10852459016393445
feature subset :  4
R2 :  -0.07639344262295089
feature subset :  5
R2 :  -0.04426229508196733
feature subset :  6
R2 :  -0.04426229508196733
feature subset :  7
R2 :  -0.012131147540983767
feature subset :  8
R2 :  0.05213114754098347
feature subset :  9
R2 :  -0.028196721311475548
feature subset :  10
R2 :  0.019999999999999907
feature subset :  11
R2 :  0.003934426229508126
feature subset :  12
R2 :  0.003934426229508126


In [14]:
rfe_mod=RFE(estimator=lr,n_features_to_select=25)
rfe_feat_mod1=rfe_mod.fit(xtrain,ytrain)
rank1=rfe_feat_mod1.ranking_
res1=pd.DataFrame()
res1['feature']=xtrain.columns
res1['rank1']=rank1
col=res1[res1['rank1']==1]['feature']
col.values

array(['DistanceFromHome', 'EnvironmentSatisfaction', 'JobInvolvement',
       'JobLevel', 'JobSatisfaction', 'NumCompaniesWorked',
       'PercentSalaryHike', 'PerformanceRating',
       'RelationshipSatisfaction', 'StockOptionLevel',
       'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance',
       'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',
       'YearsWithCurrManager', 'Age', 'BusinessTravel', 'Department',
       'EducationField', 'Gender', 'MaritalStatus', 'MonthlyIncome',
       'OverTime'], dtype=object)

In [18]:
x=df1[['DistanceFromHome', 'EnvironmentSatisfaction', 'JobInvolvement',
       'JobLevel', 'JobSatisfaction', 'NumCompaniesWorked',
       'PercentSalaryHike', 'PerformanceRating',
       'RelationshipSatisfaction', 'StockOptionLevel',
       'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance',
       'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',
       'YearsWithCurrManager', 'Age', 'BusinessTravel', 'Department',
       'EducationField', 'Gender', 'MaritalStatus', 'MonthlyIncome',
       'OverTime']]
x_scaler=StandardScaler()
x_std=x_scaler.fit_transform(x)

In [19]:
#A)

from sklearn.metrics import confusion_matrix,classification_report, roc_auc_score,roc_curve,accuracy_score
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(x_std,y,test_size=0.3,random_state=3,stratify=y)

lr=LogisticRegression()
lr.fit(X_train,y_train)
y_pred = lr.predict(X_test)
ytest_prob = lr.predict_proba(X_test)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,lr.predict(X_train)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))



cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "LR Scaled"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,lr.predict(X_train)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

LR_Scaled =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

LR_Scaled

Confusion: [[360  10]
 [ 49  22]]

Accuracy Train:  0.880466472303207
Accuracy Test: 0.8662131519274376
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.97      0.92       370
           1       0.69      0.31      0.43        71

    accuracy                           0.87       441
   macro avg       0.78      0.64      0.68       441
weighted avg       0.85      0.87      0.84       441

AUC Test:  0.7955843167110773
True positives: 22
True negatives: 360
False positives (Type I error): 10
False negatives (Type II error): 49


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,LR Scaled,59,10,49,0.69,0.31,0.88,0.87,0.43,0.8


In [17]:
#A)

from sklearn.metrics import confusion_matrix,classification_report, roc_auc_score,roc_curve,accuracy_score
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(x_std,y,test_size=0.3,random_state=3,stratify=y)

lr=LogisticRegression()
lr.fit(X_train,y_train)
y_pred = lr.predict(X_test)
ytest_prob = lr.predict_proba(X_test)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,lr.predict(X_train)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))



cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "LR Scaled"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,lr.predict(X_train)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

LR_Scaled =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

LR_Scaled

Confusion: [[360  10]
 [ 49  22]]

Accuracy Train:  0.880466472303207
Accuracy Test: 0.8662131519274376
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.97      0.92       370
           1       0.69      0.31      0.43        71

    accuracy                           0.87       441
   macro avg       0.78      0.64      0.68       441
weighted avg       0.85      0.87      0.84       441

AUC Test:  0.7955843167110773
True positives: 22
True negatives: 360
False positives (Type I error): 10
False negatives (Type II error): 49


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,LR Scaled,59,10,49,0.69,0.31,0.88,0.87,0.43,0.8


In [1]:
LR=LogisticRegression()
lr.fit(X_train,y_train)
kf =KFold(shuffle=True,n_splits=3,random_state=0)
cv_results = cross_val_score(lr, X_train, y_train,cv=kf, scoring='roc_auc')
print('roc_auc :',1-np.min(LR_Bag_be))
print('Bias error :',np.min(LR_Bag_be))
print('Variance error :',np.min(LR_Bag_var))

NameError: name 'LogisticRegression' is not defined

In [15]:
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import KFold,cross_val_score


LR_Bag_var=[]
LR_Bag_be=[]
for val in np.arange(1,100):
  LR=LogisticRegression()
  lr_Bag=BaggingClassifier(base_estimator=LR,n_estimators=val,random_state=0)
  kf =KFold(shuffle=True,n_splits=3,random_state=0)
  cv_results = cross_val_score(lr_Bag, x_std, y,cv=kf, scoring='roc_auc')
  LR_Bag_var.append(np.std(cv_results,ddof=1))
  LR_Bag_be.append(np.mean(1-cv_results))
  
np.argmin(LR_Bag_var),np.argmin(LR_Bag_be)

In [16]:
from sklearn.ensemble import RandomForestClassifier,BaggingClassifier,AdaBoostClassifier,GradientBoostingClassifier

### Gradient Boosting

## Data leak

### Logistic Regression

In [17]:
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=42,test_size=0.3)
ss=StandardScaler()
xtrains=ss.fit_transform(x_train)
xtests=ss.fit_transform(x_test)

In [18]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,classification_report,roc_auc_score,roc_curve,accuracy_score
lr1=LogisticRegression()
lr1.fit(xtrains,y_train)
y_pred = lr1.predict(xtests)
ytest_prob = lr1.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,lr1.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))

Confusion: [[364  16]
 [ 45  16]]

Accuracy Train:  0.8746355685131195
Accuracy Test: 0.8616780045351474
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.96      0.92       380
           1       0.50      0.26      0.34        61

    accuracy                           0.86       441
   macro avg       0.69      0.61      0.63       441
weighted avg       0.84      0.86      0.84       441

AUC Test:  0.7602243313201035


### Bagged LR

In [19]:
lr=LogisticRegression()
Bag=BaggingClassifier(base_estimator=lr,n_estimators=9,random_state=0)
Bag=LogisticRegression()
Bag.fit(xtrains,y_train)
y_pred = Bag.predict(xtests)
ytest_prob = Bag.predict_proba(xtests)[:,1]
    
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,Bag.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))



cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "Bag_LR"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,Bag.predict(X_train)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

bag_LR =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

bag_LR

Confusion: [[364  16]
 [ 45  16]]

Accuracy Train:  0.8746355685131195
Accuracy Test: 0.8616780045351474
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.96      0.92       380
           1       0.50      0.26      0.34        61

    accuracy                           0.86       441
   macro avg       0.69      0.61      0.63       441
weighted avg       0.84      0.86      0.84       441

AUC Test:  0.7602243313201035
True positives: 16
True negatives: 364
False positives (Type I error): 16
False negatives (Type II error): 45


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,Bag_LR,61,16,45,0.5,0.26,0.77,0.86,0.34,0.76


### Ada LR

In [20]:
lr=LogisticRegression()
Ada=AdaBoostClassifier(base_estimator=lr,n_estimators=7,random_state=0)
Ada.fit(X_train,y_train)
Ada.fit(xtrains,y_train)
y_pred = Ada.predict(xtests)
ytest_prob = Ada.predict_proba(xtests)[:,1]
    
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,Ada.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))



cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "Ada_LR"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,Ada.predict(X_train)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

Ada_LR =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

Ada_LR

Confusion: [[376   4]
 [ 51  10]]

Accuracy Train:  0.8688046647230321
Accuracy Test: 0.8752834467120182
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.99      0.93       380
           1       0.71      0.16      0.27        61

    accuracy                           0.88       441
   macro avg       0.80      0.58      0.60       441
weighted avg       0.86      0.88      0.84       441

AUC Test:  0.7557808455565143
True positives: 10
True negatives: 376
False positives (Type I error): 4
False negatives (Type II error): 51


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,Ada_LR,55,4,51,0.71,0.16,0.8,0.88,0.27,0.76


### Gradient Boosting

In [21]:
lr=LogisticRegression()
gb=GradientBoostingClassifier(n_estimators=108,random_state=0)
gb.fit(xtrains,y_train)
y_pred = gb.predict(xtests)
ytest_prob = gb.predict_proba(xtests)[:,1]
    
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,gb.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))



cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "GB"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,gb.predict(X_train)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)
GB =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])
GB

Confusion: [[365  15]
 [ 42  19]]

Accuracy Train:  0.9630709426627794
Accuracy Test: 0.8707482993197279
Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.96      0.93       380
           1       0.56      0.31      0.40        61

    accuracy                           0.87       441
   macro avg       0.73      0.64      0.66       441
weighted avg       0.85      0.87      0.85       441

AUC Test:  0.8059965487489217
True positives: 19
True negatives: 365
False positives (Type I error): 15
False negatives (Type II error): 42


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,GB,57,15,42,0.56,0.31,0.75,0.87,0.4,0.81


### KNN

In [22]:
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier()
knn.fit(xtrains,y_train)
y_pred = knn.predict(xtests)
ytest_prob = knn.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,knn.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))


cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "KNN Scaled"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train=round(accuracy_score(y_train,knn.predict(xtrains)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

Kneighbors =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

Kneighbors

Confusion: [[378   2]
 [ 54   7]]

Accuracy Train:  0.8668610301263362
Accuracy Test: 0.873015873015873
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.99      0.93       380
           1       0.78      0.11      0.20        61

    accuracy                           0.87       441
   macro avg       0.83      0.55      0.57       441
weighted avg       0.86      0.87      0.83       441

AUC Test:  0.6344262295081968
True positives: 7
True negatives: 378
False positives (Type I error): 2
False negatives (Type II error): 54


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,KNN Scaled,56,2,54,0.78,0.11,0.87,0.87,0.2,0.63


In [23]:
from sklearn.model_selection import KFold,cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
knn=KNeighborsClassifier()
param={'n_neighbors':np.arange(1,70),'weights':['uniform','distance']}
kf =KFold(shuffle=True,n_splits=3,random_state=0)
grid=GridSearchCV(knn,param,cv=kf,scoring='roc_auc')
grid.fit(x_std,y)
grid.best_params_

{'n_neighbors': 68, 'weights': 'distance'}

In [24]:
from sklearn.neighbors import KNeighborsClassifier
#Scale the input
knn=KNeighborsClassifier(n_neighbors= 60, weights= 'distance')
knn.fit(xtrains,y_train)
y_pred = knn.predict(xtests)
ytest_prob = knn.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,knn.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))


cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "KNN Scaled hypertuned"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train=accuracy_score(y_train,knn.predict(xtrains))
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

knn_tuned =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

knn_tuned


Confusion: [[380   0]
 [ 61   0]]

Accuracy Train:  1.0
Accuracy Test: 0.8616780045351474
Classification Report:
               precision    recall  f1-score   support

           0       0.86      1.00      0.93       380
           1       0.00      0.00      0.00        61

    accuracy                           0.86       441
   macro avg       0.43      0.50      0.46       441
weighted avg       0.74      0.86      0.80       441

AUC Test:  0.737532355478861
True positives: 0
True negatives: 380
False positives (Type I error): 0
False negatives (Type II error): 61


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,KNN Scaled hypertuned,61,0,61,0.0,0.0,1.0,0.86,0.0,0.74


### Bagged KNN

In [25]:

Bag=BaggingClassifier(base_estimator=knn,n_estimators=24,random_state=0)
Bag.fit(xtrains,y_train)
y_pred = Bag.predict(xtests)
ytest_prob = Bag.predict_proba(xtests)[:,1]
    
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,Bag.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))



cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "Bag_KNN"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,Bag.predict(X_train)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

bag_KNN=pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

bag_KNN

Confusion: [[380   0]
 [ 61   0]]

Accuracy Train:  1.0
Accuracy Test: 0.8616780045351474
Classification Report:
               precision    recall  f1-score   support

           0       0.86      1.00      0.93       380
           1       0.00      0.00      0.00        61

    accuracy                           0.86       441
   macro avg       0.43      0.50      0.46       441
weighted avg       0.74      0.86      0.80       441

AUC Test:  0.746548748921484
True positives: 0
True negatives: 380
False positives (Type I error): 0
False negatives (Type II error): 61


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,Bag_KNN,61,0,61,0.0,0.0,0.8,0.86,0.0,0.75


### NB

In [26]:

from sklearn.naive_bayes import GaussianNB
NB=GaussianNB()
NB.fit(xtrains,y_train)
y_pred = NB.predict(xtests)
ytest_prob = NB.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,NB.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))

Confusion: [[304  76]
 [ 28  33]]

Accuracy Train:  0.7803692905733722
Accuracy Test: 0.764172335600907
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.80      0.85       380
           1       0.30      0.54      0.39        61

    accuracy                           0.76       441
   macro avg       0.61      0.67      0.62       441
weighted avg       0.83      0.76      0.79       441

AUC Test:  0.711518550474547


In [27]:
from sklearn.naive_bayes import GaussianNB
NB=GaussianNB()
NB.fit(xtrains,y_train)
y_pred = NB.predict(xtests)
ytest_prob = NB.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,NB.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))


cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "NB Scaled "
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,NB.predict(xtrains)),2)

accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

Naive_Bayes =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

Naive_Bayes


Confusion: [[304  76]
 [ 28  33]]

Accuracy Train:  0.7803692905733722
Accuracy Test: 0.764172335600907
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.80      0.85       380
           1       0.30      0.54      0.39        61

    accuracy                           0.76       441
   macro avg       0.61      0.67      0.62       441
weighted avg       0.83      0.76      0.79       441

AUC Test:  0.711518550474547
True positives: 33
True negatives: 304
False positives (Type I error): 76
False negatives (Type II error): 28


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,NB Scaled,104,76,28,0.3,0.54,0.78,0.76,0.39,0.71


### Bagged NB

In [28]:
from sklearn.naive_bayes import GaussianNB
NB=GaussianNB()
NB_Bag=BaggingClassifier(base_estimator=NB,n_estimators=35,random_state=0)
NB_Bag.fit(xtrains,y_train)
y_pred = NB_Bag.predict(xtests)
ytest_prob = NB_Bag.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,NB_Bag.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))


cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "NB Scaled "
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,NB_Bag.predict(xtrains)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

NB_Bag =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

NB_Bag


Confusion: [[311  69]
 [ 32  29]]

Accuracy Train:  0.7949465500485908
Accuracy Test: 0.7709750566893424
Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.82      0.86       380
           1       0.30      0.48      0.36        61

    accuracy                           0.77       441
   macro avg       0.60      0.65      0.61       441
weighted avg       0.82      0.77      0.79       441

AUC Test:  0.7147109577221742
True positives: 29
True negatives: 311
False positives (Type I error): 69
False negatives (Type II error): 32


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,NB Scaled,101,69,32,0.3,0.48,0.79,0.77,0.36,0.71


### Ada Boost NB

In [29]:
from sklearn.naive_bayes import GaussianNB
NB=GaussianNB()
NB_Ada=AdaBoostClassifier(base_estimator=NB,n_estimators=1,random_state=0)
NB_Ada.fit(xtrains,y_train)
y_pred = NB_Ada.predict(xtests)
ytest_prob = NB_Ada.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,NB_Ada.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))


cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "NB_Ada "
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,NB_Ada.predict(xtrains)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

NB_Ada =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

NB_Ada


Confusion: [[304  76]
 [ 28  33]]

Accuracy Train:  0.7803692905733722
Accuracy Test: 0.764172335600907
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.80      0.85       380
           1       0.30      0.54      0.39        61

    accuracy                           0.76       441
   macro avg       0.61      0.67      0.62       441
weighted avg       0.83      0.76      0.79       441

AUC Test:  0.711518550474547
True positives: 33
True negatives: 304
False positives (Type I error): 76
False negatives (Type II error): 28


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,NB_Ada,104,76,28,0.3,0.54,0.78,0.76,0.39,0.71


### Decision Tree

In [30]:
from sklearn.tree import DecisionTreeClassifier

In [31]:
dt_model=DecisionTreeClassifier(random_state=0) #Fully Grown DT
#Fully grown Decision Tree
dt_model.fit(xtrains,y_train)
y_pred = dt_model.predict(xtests)
ytest_prob = dt_model.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,dt_model.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))

Confusion: [[324  56]
 [ 35  26]]

Accuracy Train:  1.0
Accuracy Test: 0.7936507936507936
Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.85      0.88       380
           1       0.32      0.43      0.36        61

    accuracy                           0.79       441
   macro avg       0.61      0.64      0.62       441
weighted avg       0.82      0.79      0.81       441

AUC Test:  0.6394305435720448


In [32]:
#Perform Grid Search Method to find the optimal max_depth size
from sklearn.model_selection import GridSearchCV
parameter={'max_depth':np.arange(1,10),'criterion' : ['entropy','gini'],'min_samples_leaf':np.arange(3,20)}
kf=KFold(shuffle=True,n_splits=3,random_state=0)
GS=GridSearchCV(dt_model,parameter,cv=kf,scoring='roc_auc')
GS.fit(x_std,y)
GS.best_params_

{'criterion': 'entropy', 'max_depth': 9, 'min_samples_leaf': 19}

In [33]:
dt_reg=DecisionTreeClassifier(max_depth=4,criterion='entropy',min_samples_leaf=19,random_state=0)##Regularised
#Regularised Decision Tree
dt_reg.fit(xtrains,y_train)
y_pred = dt_reg.predict(xtests)
ytest_prob = dt_reg.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,dt_reg.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))

Confusion: [[363  17]
 [ 44  17]]

Accuracy Train:  0.8668610301263362
Accuracy Test: 0.8616780045351474
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.96      0.92       380
           1       0.50      0.28      0.36        61

    accuracy                           0.86       441
   macro avg       0.70      0.62      0.64       441
weighted avg       0.84      0.86      0.84       441

AUC Test:  0.7260569456427955


In [34]:
dt_model=DecisionTreeClassifier(random_state=0) #Fully Grown DT
#Fully grown Decision Tree
dt_model.fit(xtrains,y_train)
y_pred = dt_model.predict(xtests)
ytest_prob = dt_model.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,dt_model.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))


cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "DT"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_test,y_pred),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

DT =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

DT


Confusion: [[324  56]
 [ 35  26]]

Accuracy Train:  1.0
Accuracy Test: 0.7936507936507936
Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.85      0.88       380
           1       0.32      0.43      0.36        61

    accuracy                           0.79       441
   macro avg       0.61      0.64      0.62       441
weighted avg       0.82      0.79      0.81       441

AUC Test:  0.6394305435720448
True positives: 26
True negatives: 324
False positives (Type I error): 56
False negatives (Type II error): 35


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,DT,91,56,35,0.32,0.43,0.79,0.79,0.36,0.64


In [35]:
dt_reg=DecisionTreeClassifier(max_depth=4,criterion='entropy',min_samples_leaf=19,random_state=0)##Regularised
#Regularised Decision Tree
dt_reg.fit(xtrains,y_train)
y_pred = dt_reg.predict(xtests)
ytest_prob = dt_reg.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,dt_reg.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))


cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "DT_reg"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_test,y_pred),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

DT_reg =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

DT_reg


Confusion: [[363  17]
 [ 44  17]]

Accuracy Train:  0.8668610301263362
Accuracy Test: 0.8616780045351474
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.96      0.92       380
           1       0.50      0.28      0.36        61

    accuracy                           0.86       441
   macro avg       0.70      0.62      0.64       441
weighted avg       0.84      0.86      0.84       441

AUC Test:  0.7260569456427955
True positives: 17
True negatives: 363
False positives (Type I error): 17
False negatives (Type II error): 44


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,DT_reg,61,17,44,0.5,0.28,0.86,0.86,0.36,0.73


### Bagged DT

In [36]:
dt_reg=DecisionTreeClassifier(max_depth=4,criterion='entropy',min_samples_leaf=19,random_state=0)##Regularised
#Regularised Decision Tree
dt_Bag=BaggingClassifier(base_estimator=dt_reg,n_estimators=2,random_state=0)
dt_Bag.fit(xtrains,y_train)
y_pred = dt_Bag.predict(xtests)
ytest_prob = dt_Bag.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,dt_Bag.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))


cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "dt_Bag"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,dt_Bag.predict(xtrains)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

dt_Bag =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

dt_Bag


Confusion: [[363  17]
 [ 50  11]]

Accuracy Train:  0.8649173955296404
Accuracy Test: 0.8480725623582767
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.96      0.92       380
           1       0.39      0.18      0.25        61

    accuracy                           0.85       441
   macro avg       0.64      0.57      0.58       441
weighted avg       0.81      0.85      0.82       441

AUC Test:  0.6981018119068161
True positives: 11
True negatives: 363
False positives (Type I error): 17
False negatives (Type II error): 50


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,dt_Bag,67,17,50,0.39,0.18,0.86,0.85,0.25,0.7


### DT_Ada

In [37]:
dt_reg=DecisionTreeClassifier(max_depth=4,criterion='entropy',min_samples_leaf=19,random_state=0)##Regularised
#Regularised Decision Tree
dt_Ada=AdaBoostClassifier(base_estimator=dt_reg,n_estimators=96,random_state=0)
dt_Ada.fit(xtrains,y_train)
y_pred = dt_Ada.predict(xtests)
ytest_prob = dt_Ada.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,dt_Ada.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))


cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "dt_Ada"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,dt_Ada.predict(xtrains)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

dt_ada =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

dt_ada


Confusion: [[358  22]
 [ 43  18]]

Accuracy Train:  1.0
Accuracy Test: 0.8526077097505669
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.94      0.92       380
           1       0.45      0.30      0.36        61

    accuracy                           0.85       441
   macro avg       0.67      0.62      0.64       441
weighted avg       0.83      0.85      0.84       441

AUC Test:  0.6992666091458154
True positives: 18
True negatives: 358
False positives (Type I error): 22
False negatives (Type II error): 43


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,dt_Ada,65,22,43,0.45,0.3,1.0,0.85,0.36,0.7


### Random Forest

In [38]:
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier()
rfc.fit(xtrains,y_train)
y_pred = rfc.predict(xtests)
ytest_prob = rfc.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,rfc.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))

Confusion: [[375   5]
 [ 54   7]]

Accuracy Train:  1.0
Accuracy Test: 0.8662131519274376
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.99      0.93       380
           1       0.58      0.11      0.19        61

    accuracy                           0.87       441
   macro avg       0.73      0.55      0.56       441
weighted avg       0.83      0.87      0.83       441

AUC Test:  0.788481449525453


In [39]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint

rfc_tunned=RandomForestClassifier(n_estimators=20,random_state=0)
params={'n_estimators':sp_randint(1,20),
        'max_features':sp_randint(1,6),
        'max_depth':sp_randint(2,10),
        'criterion':['gini','entropy']}

rsearch_rfc=RandomizedSearchCV(rfc_tunned,params,cv=3,scoring='roc_auc',n_jobs=-1,random_state=0)

rsearch_rfc.fit(x_std,y)

RandomizedSearchCV(cv=3,
                   estimator=RandomForestClassifier(n_estimators=20,
                                                    random_state=0),
                   n_jobs=-1,
                   param_distributions={'criterion': ['gini', 'entropy'],
                                        'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001948534B108>,
                                        'max_features': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001948534B3C8>,
                                        'n_estimators': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00000194852B8848>},
                   random_state=0, scoring='roc_auc')

In [40]:
rsearch_rfc.best_params_

{'criterion': 'gini', 'max_depth': 9, 'max_features': 1, 'n_estimators': 19}

In [41]:
rfc_tunned=RandomForestClassifier(**rsearch_rfc.best_params_,random_state=0)

rfc_tunned.fit(xtrains,y_train)
y_pred = rfc_tunned.predict(xtests)
ytest_prob = rfc_tunned.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,rfc_tunned.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))

Confusion: [[379   1]
 [ 58   3]]

Accuracy Train:  0.9193391642371235
Accuracy Test: 0.8662131519274376
Classification Report:
               precision    recall  f1-score   support

           0       0.87      1.00      0.93       380
           1       0.75      0.05      0.09        61

    accuracy                           0.87       441
   macro avg       0.81      0.52      0.51       441
weighted avg       0.85      0.87      0.81       441

AUC Test:  0.7213114754098361


In [42]:
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier()
rfc.fit(xtrains,y_train)
y_pred = rfc.predict(xtests)
ytest_prob = rfc.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,rfc.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))


cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "RF"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,rfc.predict(xtrains)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

RF =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                    accuracy_train,
                                     accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

RF


Confusion: [[375   5]
 [ 56   5]]

Accuracy Train:  1.0
Accuracy Test: 0.8616780045351474
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.99      0.92       380
           1       0.50      0.08      0.14        61

    accuracy                           0.86       441
   macro avg       0.69      0.53      0.53       441
weighted avg       0.82      0.86      0.82       441

AUC Test:  0.7728429680759276
True positives: 5
True negatives: 375
False positives (Type I error): 5
False negatives (Type II error): 56


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,RF,61,5,56,0.5,0.08,1.0,0.86,0.14,0.77


In [43]:
from sklearn.ensemble import RandomForestClassifier
rfc_tunned=RandomForestClassifier(**rsearch_rfc.best_params_,random_state=0)
rfc_tunned.fit(xtrains,y_train)
y_pred = rfc_tunned.predict(xtests)
ytest_prob = rfc_tunned.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,rfc_tunned.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))


cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "RF tuned"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,rfc_tunned.predict(xtrains)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

RF_tuned =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                      accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

RF_tuned


Confusion: [[379   1]
 [ 58   3]]

Accuracy Train:  0.9193391642371235
Accuracy Test: 0.8662131519274376
Classification Report:
               precision    recall  f1-score   support

           0       0.87      1.00      0.93       380
           1       0.75      0.05      0.09        61

    accuracy                           0.87       441
   macro avg       0.81      0.52      0.51       441
weighted avg       0.85      0.87      0.81       441

AUC Test:  0.7213114754098361
True positives: 3
True negatives: 379
False positives (Type I error): 1
False negatives (Type II error): 58


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,RF tuned,59,1,58,0.75,0.05,0.92,0.87,0.09,0.72


### Ada Boosting RandomForest

In [44]:
from sklearn.ensemble import RandomForestClassifier
rf_Ada=AdaBoostClassifier(base_estimator=rfc_tunned,n_estimators=22,random_state=0)
rf_Ada.fit(xtrains,y_train)
y_pred = rf_Ada.predict(xtests)
ytest_prob = rf_Ada.predict_proba(xtests)[:,1]
print("Confusion:",confusion_matrix(y_test,y_pred))
print()
print("Accuracy Train: ",accuracy_score(y_train,rf_Ada.predict(xtrains)))
print("Accuracy Test:",accuracy_score(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
print("AUC Test: ",roc_auc_score(y_test,ytest_prob))


cm = pd.DataFrame(confusion_matrix(y_test,y_pred), columns=['Predicted:0','Predicted:1'], index=['Actual:0','Actual:1'])

# calculating TP,TN,FP,FN
TN, FP, FN, TP = cm.iloc[0,0], cm.iloc[0,1], cm.iloc[1,0], cm.iloc[1,1]

# print values
print("True positives:", TP)
print("True negatives:", TN)
print("False positives (Type I error):", FP)
print("False negatives (Type II error):", FN)


from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, roc_curve

# itereation results
description = "Rf_Ada"
misclassifications = FP + FN
type1 = FP
type2 = FN
precision = round(precision_score(y_test,y_pred),2)
recall = round(recall_score(y_test,y_pred),2)
accuracy_train = round(accuracy_score(y_train,rf_Ada.predict(xtrains)),2)
accuracy_test = round(accuracy_score(y_test,y_pred),2)
f1 = round(f1_score(y_test,y_pred),2)
auc = round(roc_auc_score(y_test,ytest_prob),2)

Rf_Ada =pd.DataFrame(np.array([description,
                                     misclassifications,
                                     type1,
                                     type2,
                                     precision,
                                     recall,
                                     accuracy_train,
                                      accuracy_test,
                                     f1,
                                     auc]).reshape(1,-1), columns=['Description','Misclassifications','Type I errors','Type II errors','Precision','Recall','Accuracy_train','Accuracy_test','F1-score','ROC AUC'])

Rf_Ada


Confusion: [[379   1]
 [ 59   2]]

Accuracy Train:  1.0
Accuracy Test: 0.8639455782312925
Classification Report:
               precision    recall  f1-score   support

           0       0.87      1.00      0.93       380
           1       0.67      0.03      0.06        61

    accuracy                           0.86       441
   macro avg       0.77      0.52      0.49       441
weighted avg       0.84      0.86      0.81       441

AUC Test:  0.695772217428818
True positives: 2
True negatives: 379
False positives (Type I error): 1
False negatives (Type II error): 59


Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,Rf_Ada,60,1,59,0.67,0.03,1.0,0.86,0.06,0.7


### Summary

In [45]:
df_results=pd.DataFrame()
df_results=df_results.append(LR_Scaled, ignore_index=True)
df_results=df_results.append(bag_LR, ignore_index=True)
df_results=df_results.append(Ada_LR, ignore_index=True)

df_results=df_results.append(GB, ignore_index=True)
df_results=df_results.append(Kneighbors, ignore_index=True)
df_results=df_results.append(knn_tuned, ignore_index=True)
df_results=df_results.append(bag_KNN, ignore_index=True)
df_results=df_results.append(Naive_Bayes, ignore_index=True)
df_results=df_results.append(NB_Bag, ignore_index=True)
df_results=df_results.append(NB_Ada, ignore_index=True)
df_results=df_results.append(DT, ignore_index=True)
df_results=df_results.append(dt_Bag, ignore_index=True)
df_results=df_results.append(dt_ada, ignore_index=True)
df_results=df_results.append(RF, ignore_index=True)
df_results=df_results.append(RF_tuned, ignore_index=True)
df_results=df_results.append(Rf_Ada, ignore_index=True)




df_results

Unnamed: 0,Description,Misclassifications,Type I errors,Type II errors,Precision,Recall,Accuracy_train,Accuracy_test,F1-score,ROC AUC
0,LR Scaled,61,11,50,0.66,0.3,0.88,0.86,0.41,0.79
1,Bag_LR,61,16,45,0.5,0.26,0.77,0.86,0.34,0.76
2,Ada_LR,55,4,51,0.71,0.16,0.8,0.88,0.27,0.76
3,GB,57,15,42,0.56,0.31,0.75,0.87,0.4,0.81
4,KNN Scaled,56,2,54,0.78,0.11,0.87,0.87,0.2,0.63
5,KNN Scaled hypertuned,61,0,61,0.0,0.0,1.0,0.86,0.0,0.74
6,Bag_KNN,61,0,61,0.0,0.0,0.8,0.86,0.0,0.75
7,NB Scaled,104,76,28,0.3,0.54,0.78,0.76,0.39,0.71
8,NB Scaled,101,69,32,0.3,0.48,0.79,0.77,0.36,0.71
9,NB_Ada,104,76,28,0.3,0.54,0.78,0.76,0.39,0.71
