In [283]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split #Spliting method
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier, BaggingClassifier, VotingClassifier#Ensemble models
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.naive_bayes import MultinomialNB
from lightgbm import LGBMClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score #Metrics
from mlxtend.classifier import StackingCVClassifier

In [284]:
train = pd.read_csv(r'train.csv')
test = pd.read_csv(r'test.csv')
sample = pd.read_csv(r'sample.csv')

In [285]:
train.head()

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
0,NNVBBKZB,Female,73,RG268,Other,X3,43,No,1045696,No,0
1,IDD62UNG,Female,30,RG277,Salaried,X1,32,No,581988,No,0
2,HD3DSEMC,Female,56,RG268,Self_Employed,X3,26,No,1484315,Yes,0
3,BF3NC7KV,Male,34,RG270,Salaried,X1,19,No,470454,No,0
4,TEASRWXV,Female,30,RG282,Salaried,X1,33,No,886787,No,0


In [286]:
test.head()

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active
0,VBENBARO,Male,29,RG254,Other,X1,25,Yes,742366,No
1,CCMEWNKY,Male,43,RG268,Other,X2,49,,925537,No
2,VK3KGA9M,Male,31,RG270,Salaried,X1,14,No,215949,No
3,TT8RPZVC,Male,29,RG272,Other,X1,33,No,868070,No
4,SHQZEYTZ,Female,29,RG270,Other,X1,19,No,657087,No


In [287]:
sample.head()

Unnamed: 0,ID,Is_Lead
0,VBENBARO,0
1,CCMEWNKY,0
2,VK3KGA9M,0
3,TT8RPZVC,0
4,SHQZEYTZ,0


In [288]:
train.isna().sum()

ID                         0
Gender                     0
Age                        0
Region_Code                0
Occupation                 0
Channel_Code               0
Vintage                    0
Credit_Product         29325
Avg_Account_Balance        0
Is_Active                  0
Is_Lead                    0
dtype: int64

In [289]:
test.isna().sum()

ID                         0
Gender                     0
Age                        0
Region_Code                0
Occupation                 0
Channel_Code               0
Vintage                    0
Credit_Product         12522
Avg_Account_Balance        0
Is_Active                  0
dtype: int64

In [290]:
train['source'] = 'train'
test['source'] = 'test'

In [291]:
train.head()

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,source
0,NNVBBKZB,Female,73,RG268,Other,X3,43,No,1045696,No,0,train
1,IDD62UNG,Female,30,RG277,Salaried,X1,32,No,581988,No,0,train
2,HD3DSEMC,Female,56,RG268,Self_Employed,X3,26,No,1484315,Yes,0,train
3,BF3NC7KV,Male,34,RG270,Salaried,X1,19,No,470454,No,0,train
4,TEASRWXV,Female,30,RG282,Salaried,X1,33,No,886787,No,0,train


In [292]:
test.head()

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,source
0,VBENBARO,Male,29,RG254,Other,X1,25,Yes,742366,No,test
1,CCMEWNKY,Male,43,RG268,Other,X2,49,,925537,No,test
2,VK3KGA9M,Male,31,RG270,Salaried,X1,14,No,215949,No,test
3,TT8RPZVC,Male,29,RG272,Other,X1,33,No,868070,No,test
4,SHQZEYTZ,Female,29,RG270,Other,X1,19,No,657087,No,test


In [293]:
df = pd.concat([train,test])

In [294]:
df.head()

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,source
0,NNVBBKZB,Female,73,RG268,Other,X3,43,No,1045696,No,0.0,train
1,IDD62UNG,Female,30,RG277,Salaried,X1,32,No,581988,No,0.0,train
2,HD3DSEMC,Female,56,RG268,Self_Employed,X3,26,No,1484315,Yes,0.0,train
3,BF3NC7KV,Male,34,RG270,Salaried,X1,19,No,470454,No,0.0,train
4,TEASRWXV,Female,30,RG282,Salaried,X1,33,No,886787,No,0.0,train


In [295]:
df

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,source
0,NNVBBKZB,Female,73,RG268,Other,X3,43,No,1045696,No,0.0,train
1,IDD62UNG,Female,30,RG277,Salaried,X1,32,No,581988,No,0.0,train
2,HD3DSEMC,Female,56,RG268,Self_Employed,X3,26,No,1484315,Yes,0.0,train
3,BF3NC7KV,Male,34,RG270,Salaried,X1,19,No,470454,No,0.0,train
4,TEASRWXV,Female,30,RG282,Salaried,X1,33,No,886787,No,0.0,train
...,...,...,...,...,...,...,...,...,...,...,...,...
105307,DBENJOYI,Male,52,RG268,Salaried,X2,86,Yes,4242558,Yes,,test
105308,CWQ72DWS,Male,55,RG277,Other,X2,86,Yes,1159153,No,,test
105309,HDESC8GU,Male,35,RG254,Salaried,X4,15,No,1703727,No,,test
105310,2PW4SFCA,Male,53,RG254,Other,X3,93,No,737178,Yes,,test


In [296]:
df.columns

Index(['ID', 'Gender', 'Age', 'Region_Code', 'Occupation', 'Channel_Code',
       'Vintage', 'Credit_Product', 'Avg_Account_Balance', 'Is_Active',
       'Is_Lead', 'source'],
      dtype='object')

In [297]:
df['ID'].nunique()

351037

In [298]:
df['Gender'].value_counts()

Male      191902
Female    159135
Name: Gender, dtype: int64

In [299]:
df.isna().sum()

ID                          0
Gender                      0
Age                         0
Region_Code                 0
Occupation                  0
Channel_Code                0
Vintage                     0
Credit_Product          41847
Avg_Account_Balance         0
Is_Active                   0
Is_Lead                105312
source                      0
dtype: int64

In [300]:
df['Age'].value_counts()

28    20880
29    18984
27    18615
30    15684
26    12916
      ...  
82     1077
83     1022
84      916
85      888
23        1
Name: Age, Length: 63, dtype: int64

In [301]:
df['Region_Code'].value_counts()

RG268    51059
RG283    42297
RG254    38577
RG284    27493
RG277    18422
RG280    18304
RG270    11160
RG269    11151
RG261    10919
RG257     8689
RG251     8628
RG282     8166
RG272     7590
RG274     7494
RG281     7237
RG273     6300
RG252     6097
RG279     5576
RG263     5302
RG275     4628
RG260     4410
RG256     4041
RG264     4034
RG276     3949
RG259     3688
RG250     3544
RG255     2909
RG258     2763
RG253     2668
RG278     2646
RG262     2588
RG265     2214
RG266     2209
RG271     2196
RG267     2089
Name: Region_Code, dtype: int64

In [302]:
df['Region_Code'].nunique()

35

In [303]:
df['Occupation'].value_counts()

Self_Employed    144078
Salaried         102912
Other            100304
Entrepreneur       3743
Name: Occupation, dtype: int64

In [304]:
df['Channel_Code'].value_counts()

X1    148202
X3     97981
X2     96902
X4      7952
Name: Channel_Code, dtype: int64

In [305]:
df['Vintage'].value_counts()

13     17717
14     17593
21     17550
15     17518
19     17457
       ...  
129      746
128      712
133       29
135       26
134       17
Name: Vintage, Length: 66, dtype: int64

In [306]:
df['Credit_Product'].isna().sum()

41847

In [307]:
df['Credit_Product'].value_counts()

No     205965
Yes    103225
Name: Credit_Product, dtype: int64

In [308]:
df[df['Credit_Product'].isna()] ['Gender'].value_counts()


Male      26151
Female    15696
Name: Gender, dtype: int64

In [309]:
df.groupby(['Credit_Product']) ['source'].value_counts()

Credit_Product  source
No              train     144357
                test       61608
Yes             train      72043
                test       31182
Name: source, dtype: int64

In [310]:
df[df['Credit_Product'].isna()] ['Gender']

6           Male
15          Male
31        Female
36        Female
40        Female
           ...  
105260      Male
105266      Male
105272      Male
105286      Male
105295      Male
Name: Gender, Length: 41847, dtype: object

In [311]:
df['Credit_Product'].fillna(value = 'No', inplace = True)

In [312]:
df.isna().sum()

ID                          0
Gender                      0
Age                         0
Region_Code                 0
Occupation                  0
Channel_Code                0
Vintage                     0
Credit_Product              0
Avg_Account_Balance         0
Is_Active                   0
Is_Lead                105312
source                      0
dtype: int64

In [313]:
df_number_columns = df.select_dtypes(include=np.number).columns

In [314]:
df_category_columns = df.select_dtypes(exclude=np.number).columns

In [315]:

df_category_columns

Index(['ID', 'Gender', 'Region_Code', 'Occupation', 'Channel_Code',
       'Credit_Product', 'Is_Active', 'source'],
      dtype='object')

In [316]:
data_to_encode = df[df_category_columns].drop(columns = ['source','ID'])

In [317]:
data_to_encode.isna().sum()

Gender            0
Region_Code       0
Occupation        0
Channel_Code      0
Credit_Product    0
Is_Active         0
dtype: int64

In [318]:
pd.get_dummies(data_to_encode).shape

(351037, 49)

In [319]:
data_to_encoded = pd.get_dummies(data_to_encode)

In [320]:
data_to_encoded

Unnamed: 0,Gender_Female,Gender_Male,Region_Code_RG250,Region_Code_RG251,Region_Code_RG252,Region_Code_RG253,Region_Code_RG254,Region_Code_RG255,Region_Code_RG256,Region_Code_RG257,...,Occupation_Salaried,Occupation_Self_Employed,Channel_Code_X1,Channel_Code_X2,Channel_Code_X3,Channel_Code_X4,Credit_Product_No,Credit_Product_Yes,Is_Active_No,Is_Active_Yes
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,1,0
1,1,0,0,0,0,0,0,0,0,0,...,1,0,1,0,0,0,1,0,1,0
2,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,1,0,0,1
3,0,1,0,0,0,0,0,0,0,0,...,1,0,1,0,0,0,1,0,1,0
4,1,0,0,0,0,0,0,0,0,0,...,1,0,1,0,0,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105307,0,1,0,0,0,0,0,0,0,0,...,1,0,0,1,0,0,0,1,0,1
105308,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,1,0
105309,0,1,0,0,0,0,1,0,0,0,...,1,0,0,0,0,1,1,0,1,0
105310,0,1,0,0,0,0,1,0,0,0,...,0,0,0,0,1,0,1,0,0,1


In [321]:
df_final = pd.concat([data_to_encoded, df[df_number_columns], 
                      df[['source']]], axis = 1)

In [322]:
df_final

Unnamed: 0,Gender_Female,Gender_Male,Region_Code_RG250,Region_Code_RG251,Region_Code_RG252,Region_Code_RG253,Region_Code_RG254,Region_Code_RG255,Region_Code_RG256,Region_Code_RG257,...,Channel_Code_X4,Credit_Product_No,Credit_Product_Yes,Is_Active_No,Is_Active_Yes,Age,Vintage,Avg_Account_Balance,Is_Lead,source
0,1,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,73,43,1045696,0.0,train
1,1,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,30,32,581988,0.0,train
2,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,56,26,1484315,0.0,train
3,0,1,0,0,0,0,0,0,0,0,...,0,1,0,1,0,34,19,470454,0.0,train
4,1,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,30,33,886787,0.0,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105307,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,1,52,86,4242558,,test
105308,0,1,0,0,0,0,0,0,0,0,...,0,0,1,1,0,55,86,1159153,,test
105309,0,1,0,0,0,0,1,0,0,0,...,1,1,0,1,0,35,15,1703727,,test
105310,0,1,0,0,0,0,1,0,0,0,...,0,1,0,0,1,53,93,737178,,test


In [323]:
train_preprocessed = df_final[df_final['source'] == 'train']
test_preprocessed = df_final[df_final['source'] == 'test']

In [324]:
del train_preprocessed['source']
train_preprocessed.head()

Unnamed: 0,Gender_Female,Gender_Male,Region_Code_RG250,Region_Code_RG251,Region_Code_RG252,Region_Code_RG253,Region_Code_RG254,Region_Code_RG255,Region_Code_RG256,Region_Code_RG257,...,Channel_Code_X3,Channel_Code_X4,Credit_Product_No,Credit_Product_Yes,Is_Active_No,Is_Active_Yes,Age,Vintage,Avg_Account_Balance,Is_Lead
0,1,0,0,0,0,0,0,0,0,0,...,1,0,1,0,1,0,73,43,1045696,0.0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,30,32,581988,0.0
2,1,0,0,0,0,0,0,0,0,0,...,1,0,1,0,0,1,56,26,1484315,0.0
3,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,34,19,470454,0.0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,30,33,886787,0.0


In [325]:
del test_preprocessed['source']
test_preprocessed.head()

Unnamed: 0,Gender_Female,Gender_Male,Region_Code_RG250,Region_Code_RG251,Region_Code_RG252,Region_Code_RG253,Region_Code_RG254,Region_Code_RG255,Region_Code_RG256,Region_Code_RG257,...,Channel_Code_X3,Channel_Code_X4,Credit_Product_No,Credit_Product_Yes,Is_Active_No,Is_Active_Yes,Age,Vintage,Avg_Account_Balance,Is_Lead
0,0,1,0,0,0,0,1,0,0,0,...,0,0,0,1,1,0,29,25,742366,
1,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,43,49,925537,
2,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,31,14,215949,
3,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,29,33,868070,
4,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,29,19,657087,


In [326]:
X = train_preprocessed.drop(columns='Is_Lead')

In [327]:
y = train_preprocessed['Is_Lead']

In [328]:
X.head()

Unnamed: 0,Gender_Female,Gender_Male,Region_Code_RG250,Region_Code_RG251,Region_Code_RG252,Region_Code_RG253,Region_Code_RG254,Region_Code_RG255,Region_Code_RG256,Region_Code_RG257,...,Channel_Code_X2,Channel_Code_X3,Channel_Code_X4,Credit_Product_No,Credit_Product_Yes,Is_Active_No,Is_Active_Yes,Age,Vintage,Avg_Account_Balance
0,1,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,1,0,73,43,1045696
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,30,32,581988
2,1,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,0,1,56,26,1484315
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,34,19,470454
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,30,33,886787


In [329]:
y.head()

0    0.0
1    0.0
2    0.0
3    0.0
4    0.0
Name: Is_Lead, dtype: float64

In [330]:
X_predict = test_preprocessed.drop(columns='Is_Lead')
X_predict

Unnamed: 0,Gender_Female,Gender_Male,Region_Code_RG250,Region_Code_RG251,Region_Code_RG252,Region_Code_RG253,Region_Code_RG254,Region_Code_RG255,Region_Code_RG256,Region_Code_RG257,...,Channel_Code_X2,Channel_Code_X3,Channel_Code_X4,Credit_Product_No,Credit_Product_Yes,Is_Active_No,Is_Active_Yes,Age,Vintage,Avg_Account_Balance
0,0,1,0,0,0,0,1,0,0,0,...,0,0,0,0,1,1,0,29,25,742366
1,0,1,0,0,0,0,0,0,0,0,...,1,0,0,1,0,1,0,43,49,925537
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,31,14,215949
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,29,33,868070
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,29,19,657087
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105307,0,1,0,0,0,0,0,0,0,0,...,1,0,0,0,1,0,1,52,86,4242558
105308,0,1,0,0,0,0,0,0,0,0,...,1,0,0,0,1,1,0,55,86,1159153
105309,0,1,0,0,0,0,1,0,0,0,...,0,0,1,1,0,1,0,35,15,1703727
105310,0,1,0,0,0,0,1,0,0,0,...,0,1,0,1,0,0,1,53,93,737178


In [331]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 123, stratify = y)

In [332]:
X_train.head()

Unnamed: 0,Gender_Female,Gender_Male,Region_Code_RG250,Region_Code_RG251,Region_Code_RG252,Region_Code_RG253,Region_Code_RG254,Region_Code_RG255,Region_Code_RG256,Region_Code_RG257,...,Channel_Code_X2,Channel_Code_X3,Channel_Code_X4,Credit_Product_No,Credit_Product_Yes,Is_Active_No,Is_Active_Yes,Age,Vintage,Avg_Account_Balance
194309,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,25,15,721763
10975,0,1,0,0,0,0,0,0,0,0,...,0,1,0,0,1,1,0,76,91,827495
115600,0,1,0,0,0,0,0,0,0,0,...,0,1,0,1,0,0,1,80,105,3781672
206611,0,1,0,0,0,0,0,0,0,0,...,0,1,0,1,0,0,1,51,62,999078
239303,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,38,33,789670


In [333]:
X_test.head()

Unnamed: 0,Gender_Female,Gender_Male,Region_Code_RG250,Region_Code_RG251,Region_Code_RG252,Region_Code_RG253,Region_Code_RG254,Region_Code_RG255,Region_Code_RG256,Region_Code_RG257,...,Channel_Code_X2,Channel_Code_X3,Channel_Code_X4,Credit_Product_No,Credit_Product_Yes,Is_Active_No,Is_Active_Yes,Age,Vintage,Avg_Account_Balance
23713,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,31,27,1410988
193975,1,0,0,0,0,0,1,0,0,0,...,0,0,0,1,0,1,0,31,27,1459644
103519,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,1,0,25,15,6590512
54848,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,1,0,52,51,1449703
14787,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,34,20,1454705


In [334]:
y_train.head()

194309    0.0
10975     1.0
115600    0.0
206611    1.0
239303    0.0
Name: Is_Lead, dtype: float64

In [335]:
y_test.head()

23713     0.0
193975    0.0
103519    0.0
54848     1.0
14787     0.0
Name: Is_Lead, dtype: float64

In [344]:
def metrics(model, X_train, X_test, y_true, y_pred):
    print('Confusion Matrix \n', confusion_matrix(y_true, y_pred))
    print('Accuracy Score \n', accuracy_score(y_true, y_pred))
    print('Classification Report \n', classification_report(y_true, y_pred))
    
def predictions(model, X_train = X_train, X_test = X_test, y_train = y_train, y_test = y_test, evaluation = True, full = False):
    if evaluation:
        model.fit(X_train, y_train)
    
        train_pred = model.predict(X_train)
    
        test_pred = model.predict(X_test)
    
        actual = [y_train, y_test]
    
        pred = [train_pred, test_pred]
    
        data = ['Train', 'Test']
    
        for i in range(2):
        
            print(data[i],'\n')
        
            metrics(model, X_train,  X_test, actual[i], pred[i])
    if full:
        model.fit(X, y)
        pred_data = model.predict(X_predict)
        #return pred_data
        
        sample['Is_Lead'] = pred_data
        sample.to_csv(str(model)[:10]+'.csv', index = None)

In [345]:
lr = LogisticRegression(max_iter=500)

In [346]:
predictions(lr)

Train 

Confusion Matrix 
 [[131206      0]
 [ 40801      0]]
Accuracy Score 
 0.7627945374316162


  _warn_prf(average, modifier, msg_start, len(result))


Classification Report 
               precision    recall  f1-score   support

         0.0       0.76      1.00      0.87    131206
         1.0       0.00      0.00      0.00     40801

    accuracy                           0.76    172007
   macro avg       0.38      0.50      0.43    172007
weighted avg       0.58      0.76      0.66    172007

Test 

Confusion Matrix 
 [[56231     0]
 [17487     0]]
Accuracy Score 
 0.7627852084972463
Classification Report 
               precision    recall  f1-score   support

         0.0       0.76      1.00      0.87     56231
         1.0       0.00      0.00      0.00     17487

    accuracy                           0.76     73718
   macro avg       0.38      0.50      0.43     73718
weighted avg       0.58      0.76      0.66     73718



In [347]:
predictions(lr, evaluation = False, full=True)

Navie Byes

In [348]:
nb = MultinomialNB()

predictions(nb)

Train 

Confusion Matrix 
 [[84045 47161]
 [17136 23665]]
Accuracy Score 
 0.6261954455341934
Classification Report 
               precision    recall  f1-score   support

         0.0       0.83      0.64      0.72    131206
         1.0       0.33      0.58      0.42     40801

    accuracy                           0.63    172007
   macro avg       0.58      0.61      0.57    172007
weighted avg       0.71      0.63      0.65    172007

Test 

Confusion Matrix 
 [[36147 20084]
 [ 7291 10196]]
Accuracy Score 
 0.6286524322417862
Classification Report 
               precision    recall  f1-score   support

         0.0       0.83      0.64      0.73     56231
         1.0       0.34      0.58      0.43     17487

    accuracy                           0.63     73718
   macro avg       0.58      0.61      0.58     73718
weighted avg       0.71      0.63      0.65     73718



In [350]:
predictions(nb, evaluation=False, full = True)
#Predicting Full data set.

Decision Tree

In [351]:
dtree = DecisionTreeClassifier()

predictions(dtree)

Train 

Confusion Matrix 
 [[131206      0]
 [     0  40801]]
Accuracy Score 
 1.0
Classification Report 
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00    131206
         1.0       1.00      1.00      1.00     40801

    accuracy                           1.00    172007
   macro avg       1.00      1.00      1.00    172007
weighted avg       1.00      1.00      1.00    172007

Test 

Confusion Matrix 
 [[45375 10856]
 [10249  7238]]
Accuracy Score 
 0.7137062861173662
Classification Report 
               precision    recall  f1-score   support

         0.0       0.82      0.81      0.81     56231
         1.0       0.40      0.41      0.41     17487

    accuracy                           0.71     73718
   macro avg       0.61      0.61      0.61     73718
weighted avg       0.72      0.71      0.72     73718



In [352]:
predictions(dtree, evaluation=False, full = True)

Bagging Clasifier

In [353]:
bag = BaggingClassifier()
predictions(bag)

Train 

Confusion Matrix 
 [[131030    176]
 [  3461  37340]]
Accuracy Score 
 0.9788555116942915
Classification Report 
               precision    recall  f1-score   support

         0.0       0.97      1.00      0.99    131206
         1.0       1.00      0.92      0.95     40801

    accuracy                           0.98    172007
   macro avg       0.98      0.96      0.97    172007
weighted avg       0.98      0.98      0.98    172007

Test 

Confusion Matrix 
 [[51541  4690]
 [12233  5254]]
Accuracy Score 
 0.770435985783662
Classification Report 
               precision    recall  f1-score   support

         0.0       0.81      0.92      0.86     56231
         1.0       0.53      0.30      0.38     17487

    accuracy                           0.77     73718
   macro avg       0.67      0.61      0.62     73718
weighted avg       0.74      0.77      0.75     73718



In [354]:
predictions(bag, evaluation=False, full = True)

Random Forest

In [355]:
rf = RandomForestClassifier()
predictions(rf)

Train 

Confusion Matrix 
 [[131206      0]
 [    13  40788]]
Accuracy Score 
 0.999924421680513
Classification Report 
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00    131206
         1.0       1.00      1.00      1.00     40801

    accuracy                           1.00    172007
   macro avg       1.00      1.00      1.00    172007
weighted avg       1.00      1.00      1.00    172007

Test 

Confusion Matrix 
 [[51734  4497]
 [11859  5628]]
Accuracy Score 
 0.7781274586939417
Classification Report 
               precision    recall  f1-score   support

         0.0       0.81      0.92      0.86     56231
         1.0       0.56      0.32      0.41     17487

    accuracy                           0.78     73718
   macro avg       0.68      0.62      0.64     73718
weighted avg       0.75      0.78      0.76     73718



In [356]:
predictions(rf, evaluation=False, full = True)
#Predicting test data set.

Ada Boost

In [357]:

ada = AdaBoostClassifier()

predictions(ada)

Train 

Confusion Matrix 
 [[126638   4568]
 [ 32848   7953]]
Accuracy Score 
 0.7824739690826537
Classification Report 
               precision    recall  f1-score   support

         0.0       0.79      0.97      0.87    131206
         1.0       0.64      0.19      0.30     40801

    accuracy                           0.78    172007
   macro avg       0.71      0.58      0.58    172007
weighted avg       0.76      0.78      0.74    172007

Test 

Confusion Matrix 
 [[54295  1936]
 [14014  3473]]
Accuracy Score 
 0.7836349331235248
Classification Report 
               precision    recall  f1-score   support

         0.0       0.79      0.97      0.87     56231
         1.0       0.64      0.20      0.30     17487

    accuracy                           0.78     73718
   macro avg       0.72      0.58      0.59     73718
weighted avg       0.76      0.78      0.74     73718



In [358]:
predictions(ada, evaluation=False, full = True)
#Predicting test data set.

Gradient Boost

In [359]:
grad = GradientBoostingClassifier()

predictions(grad)

Train 

Confusion Matrix 
 [[127912   3294]
 [ 32549   8252]]
Accuracy Score 
 0.7916189457405803
Classification Report 
               precision    recall  f1-score   support

         0.0       0.80      0.97      0.88    131206
         1.0       0.71      0.20      0.32     40801

    accuracy                           0.79    172007
   macro avg       0.76      0.59      0.60    172007
weighted avg       0.78      0.79      0.74    172007

Test 

Confusion Matrix 
 [[54856  1375]
 [13860  3627]]
Accuracy Score 
 0.7933340568110909
Classification Report 
               precision    recall  f1-score   support

         0.0       0.80      0.98      0.88     56231
         1.0       0.73      0.21      0.32     17487

    accuracy                           0.79     73718
   macro avg       0.76      0.59      0.60     73718
weighted avg       0.78      0.79      0.75     73718



In [360]:
predictions(grad, evaluation=False, full = True)

LGBM Classifier

In [361]:
lgbm = LGBMClassifier()
predictions(lgbm)

Train 

Confusion Matrix 
 [[126597   4609]
 [ 30141  10660]]
Accuracy Score 
 0.7979733382943718
Classification Report 
               precision    recall  f1-score   support

         0.0       0.81      0.96      0.88    131206
         1.0       0.70      0.26      0.38     40801

    accuracy                           0.80    172007
   macro avg       0.75      0.61      0.63    172007
weighted avg       0.78      0.80      0.76    172007

Test 

Confusion Matrix 
 [[54099  2132]
 [12987  4500]]
Accuracy Score 
 0.7949076209338289
Classification Report 
               precision    recall  f1-score   support

         0.0       0.81      0.96      0.88     56231
         1.0       0.68      0.26      0.37     17487

    accuracy                           0.79     73718
   macro avg       0.74      0.61      0.63     73718
weighted avg       0.78      0.79      0.76     73718



In [362]:
predictions(lgbm, evaluation=False, full = True)
#Predicting test data set.

XG Boost

In [363]:
xgb = XGBClassifier()
predictions(xgb)







Train 

Confusion Matrix 
 [[126085   5121]
 [ 28138  12663]]
Accuracy Score 
 0.806641590167842
Classification Report 
               precision    recall  f1-score   support

         0.0       0.82      0.96      0.88    131206
         1.0       0.71      0.31      0.43     40801

    accuracy                           0.81    172007
   macro avg       0.76      0.64      0.66    172007
weighted avg       0.79      0.81      0.78    172007

Test 

Confusion Matrix 
 [[53593  2638]
 [12520  4967]]
Accuracy Score 
 0.794378577823598
Classification Report 
               precision    recall  f1-score   support

         0.0       0.81      0.95      0.88     56231
         1.0       0.65      0.28      0.40     17487

    accuracy                           0.79     73718
   macro avg       0.73      0.62      0.64     73718
weighted avg       0.77      0.79      0.76     73718



In [364]:
predictions(xgb, evaluation=False, full = True)
#Predicting test data set.



CAT Boost

In [365]:
cat = CatBoostClassifier()

In [366]:
predictions(cat)

Learning rate set to 0.092792
0:	learn: 0.6421871	total: 262ms	remaining: 4m 21s
1:	learn: 0.6032739	total: 357ms	remaining: 2m 58s
2:	learn: 0.5734717	total: 461ms	remaining: 2m 33s
3:	learn: 0.5489574	total: 576ms	remaining: 2m 23s
4:	learn: 0.5296216	total: 689ms	remaining: 2m 17s
5:	learn: 0.5145078	total: 814ms	remaining: 2m 14s
6:	learn: 0.5027293	total: 920ms	remaining: 2m 10s
7:	learn: 0.4943846	total: 1.04s	remaining: 2m 9s
8:	learn: 0.4860746	total: 1.2s	remaining: 2m 12s
9:	learn: 0.4799463	total: 1.33s	remaining: 2m 12s
10:	learn: 0.4745585	total: 1.47s	remaining: 2m 12s
11:	learn: 0.4706236	total: 1.56s	remaining: 2m 8s
12:	learn: 0.4673966	total: 1.66s	remaining: 2m 5s
13:	learn: 0.4643528	total: 1.75s	remaining: 2m 3s
14:	learn: 0.4614852	total: 1.85s	remaining: 2m 1s
15:	learn: 0.4593891	total: 1.94s	remaining: 1m 59s
16:	learn: 0.4577894	total: 2.04s	remaining: 1m 57s
17:	learn: 0.4555716	total: 2.13s	remaining: 1m 56s
18:	learn: 0.4545608	total: 2.21s	remaining: 1m 54

158:	learn: 0.4353352	total: 17.3s	remaining: 1m 31s
159:	learn: 0.4352999	total: 17.4s	remaining: 1m 31s
160:	learn: 0.4352585	total: 17.5s	remaining: 1m 31s
161:	learn: 0.4352271	total: 17.6s	remaining: 1m 31s
162:	learn: 0.4351801	total: 17.7s	remaining: 1m 31s
163:	learn: 0.4351508	total: 17.8s	remaining: 1m 30s
164:	learn: 0.4351122	total: 17.9s	remaining: 1m 30s
165:	learn: 0.4350823	total: 18s	remaining: 1m 30s
166:	learn: 0.4350438	total: 18.1s	remaining: 1m 30s
167:	learn: 0.4350061	total: 18.2s	remaining: 1m 30s
168:	learn: 0.4349823	total: 18.3s	remaining: 1m 30s
169:	learn: 0.4349577	total: 18.4s	remaining: 1m 29s
170:	learn: 0.4349179	total: 18.5s	remaining: 1m 29s
171:	learn: 0.4348669	total: 18.6s	remaining: 1m 29s
172:	learn: 0.4348237	total: 18.7s	remaining: 1m 29s
173:	learn: 0.4347902	total: 18.8s	remaining: 1m 29s
174:	learn: 0.4347590	total: 18.9s	remaining: 1m 29s
175:	learn: 0.4346915	total: 19s	remaining: 1m 28s
176:	learn: 0.4346550	total: 19.1s	remaining: 1m 2

315:	learn: 0.4302830	total: 32.7s	remaining: 1m 10s
316:	learn: 0.4302567	total: 32.8s	remaining: 1m 10s
317:	learn: 0.4302175	total: 32.9s	remaining: 1m 10s
318:	learn: 0.4301878	total: 33s	remaining: 1m 10s
319:	learn: 0.4301707	total: 33.1s	remaining: 1m 10s
320:	learn: 0.4301376	total: 33.2s	remaining: 1m 10s
321:	learn: 0.4301043	total: 33.3s	remaining: 1m 10s
322:	learn: 0.4300797	total: 33.4s	remaining: 1m 9s
323:	learn: 0.4300590	total: 33.5s	remaining: 1m 9s
324:	learn: 0.4300391	total: 33.5s	remaining: 1m 9s
325:	learn: 0.4300134	total: 33.6s	remaining: 1m 9s
326:	learn: 0.4299781	total: 33.7s	remaining: 1m 9s
327:	learn: 0.4299486	total: 33.8s	remaining: 1m 9s
328:	learn: 0.4299279	total: 33.9s	remaining: 1m 9s
329:	learn: 0.4298986	total: 34s	remaining: 1m 9s
330:	learn: 0.4298693	total: 34.1s	remaining: 1m 8s
331:	learn: 0.4298361	total: 34.2s	remaining: 1m 8s
332:	learn: 0.4298132	total: 34.3s	remaining: 1m 8s
333:	learn: 0.4297877	total: 34.3s	remaining: 1m 8s
334:	lear

474:	learn: 0.4261051	total: 47.7s	remaining: 52.7s
475:	learn: 0.4260870	total: 47.8s	remaining: 52.6s
476:	learn: 0.4260683	total: 47.9s	remaining: 52.5s
477:	learn: 0.4260353	total: 48s	remaining: 52.4s
478:	learn: 0.4260075	total: 48.1s	remaining: 52.3s
479:	learn: 0.4259864	total: 48.2s	remaining: 52.2s
480:	learn: 0.4259565	total: 48.3s	remaining: 52.1s
481:	learn: 0.4259373	total: 48.4s	remaining: 52s
482:	learn: 0.4259226	total: 48.5s	remaining: 51.9s
483:	learn: 0.4259028	total: 48.5s	remaining: 51.8s
484:	learn: 0.4258645	total: 48.6s	remaining: 51.6s
485:	learn: 0.4258443	total: 48.7s	remaining: 51.5s
486:	learn: 0.4258204	total: 48.8s	remaining: 51.4s
487:	learn: 0.4257871	total: 48.9s	remaining: 51.3s
488:	learn: 0.4257571	total: 49s	remaining: 51.2s
489:	learn: 0.4257321	total: 49.1s	remaining: 51.1s
490:	learn: 0.4256933	total: 49.2s	remaining: 51s
491:	learn: 0.4256648	total: 49.3s	remaining: 50.9s
492:	learn: 0.4256427	total: 49.4s	remaining: 50.8s
493:	learn: 0.425618

636:	learn: 0.4222724	total: 1m 2s	remaining: 35.7s
637:	learn: 0.4222589	total: 1m 2s	remaining: 35.6s
638:	learn: 0.4222378	total: 1m 2s	remaining: 35.5s
639:	learn: 0.4222029	total: 1m 2s	remaining: 35.4s
640:	learn: 0.4221644	total: 1m 3s	remaining: 35.3s
641:	learn: 0.4221430	total: 1m 3s	remaining: 35.2s
642:	learn: 0.4221260	total: 1m 3s	remaining: 35.1s
643:	learn: 0.4220970	total: 1m 3s	remaining: 35s
644:	learn: 0.4220777	total: 1m 3s	remaining: 34.9s
645:	learn: 0.4220601	total: 1m 3s	remaining: 34.8s
646:	learn: 0.4220411	total: 1m 3s	remaining: 34.7s
647:	learn: 0.4220163	total: 1m 3s	remaining: 34.6s
648:	learn: 0.4219904	total: 1m 3s	remaining: 34.5s
649:	learn: 0.4219765	total: 1m 3s	remaining: 34.4s
650:	learn: 0.4219568	total: 1m 4s	remaining: 34.3s
651:	learn: 0.4219384	total: 1m 4s	remaining: 34.2s
652:	learn: 0.4219241	total: 1m 4s	remaining: 34.1s
653:	learn: 0.4219083	total: 1m 4s	remaining: 34s
654:	learn: 0.4218877	total: 1m 4s	remaining: 33.9s
655:	learn: 0.42

794:	learn: 0.4188884	total: 1m 17s	remaining: 20.1s
795:	learn: 0.4188718	total: 1m 18s	remaining: 20s
796:	learn: 0.4188488	total: 1m 18s	remaining: 19.9s
797:	learn: 0.4188297	total: 1m 18s	remaining: 19.8s
798:	learn: 0.4188067	total: 1m 18s	remaining: 19.7s
799:	learn: 0.4187874	total: 1m 18s	remaining: 19.6s
800:	learn: 0.4187712	total: 1m 18s	remaining: 19.5s
801:	learn: 0.4187479	total: 1m 18s	remaining: 19.4s
802:	learn: 0.4187216	total: 1m 18s	remaining: 19.4s
803:	learn: 0.4187071	total: 1m 18s	remaining: 19.3s
804:	learn: 0.4186828	total: 1m 19s	remaining: 19.2s
805:	learn: 0.4186716	total: 1m 19s	remaining: 19.1s
806:	learn: 0.4186445	total: 1m 19s	remaining: 19s
807:	learn: 0.4186186	total: 1m 19s	remaining: 18.9s
808:	learn: 0.4186064	total: 1m 19s	remaining: 18.8s
809:	learn: 0.4185853	total: 1m 19s	remaining: 18.7s
810:	learn: 0.4185630	total: 1m 19s	remaining: 18.6s
811:	learn: 0.4185458	total: 1m 19s	remaining: 18.5s
812:	learn: 0.4185198	total: 1m 19s	remaining: 18.

950:	learn: 0.4158802	total: 1m 33s	remaining: 4.81s
951:	learn: 0.4158577	total: 1m 33s	remaining: 4.71s
952:	learn: 0.4158392	total: 1m 33s	remaining: 4.62s
953:	learn: 0.4158256	total: 1m 33s	remaining: 4.52s
954:	learn: 0.4158057	total: 1m 33s	remaining: 4.42s
955:	learn: 0.4157838	total: 1m 33s	remaining: 4.32s
956:	learn: 0.4157576	total: 1m 34s	remaining: 4.22s
957:	learn: 0.4157331	total: 1m 34s	remaining: 4.13s
958:	learn: 0.4157111	total: 1m 34s	remaining: 4.03s
959:	learn: 0.4156963	total: 1m 34s	remaining: 3.93s
960:	learn: 0.4156717	total: 1m 34s	remaining: 3.83s
961:	learn: 0.4156589	total: 1m 34s	remaining: 3.73s
962:	learn: 0.4156441	total: 1m 34s	remaining: 3.63s
963:	learn: 0.4156260	total: 1m 34s	remaining: 3.53s
964:	learn: 0.4156063	total: 1m 34s	remaining: 3.44s
965:	learn: 0.4155816	total: 1m 34s	remaining: 3.34s
966:	learn: 0.4155600	total: 1m 34s	remaining: 3.24s
967:	learn: 0.4155440	total: 1m 35s	remaining: 3.14s
968:	learn: 0.4155198	total: 1m 35s	remaining:

In [367]:
pred_data = predictions(cat, evaluation=False, full = True)

Learning rate set to 0.108058
0:	learn: 0.6341601	total: 125ms	remaining: 2m 5s
1:	learn: 0.5925647	total: 230ms	remaining: 1m 54s
2:	learn: 0.5602600	total: 350ms	remaining: 1m 56s
3:	learn: 0.5348939	total: 473ms	remaining: 1m 57s
4:	learn: 0.5158929	total: 618ms	remaining: 2m 3s
5:	learn: 0.5012648	total: 752ms	remaining: 2m 4s
6:	learn: 0.4918837	total: 866ms	remaining: 2m 2s
7:	learn: 0.4833426	total: 999ms	remaining: 2m 3s
8:	learn: 0.4767743	total: 1.13s	remaining: 2m 4s
9:	learn: 0.4709965	total: 1.26s	remaining: 2m 4s
10:	learn: 0.4665853	total: 1.37s	remaining: 2m 3s
11:	learn: 0.4630754	total: 1.5s	remaining: 2m 3s
12:	learn: 0.4604782	total: 1.61s	remaining: 2m 2s
13:	learn: 0.4583708	total: 1.72s	remaining: 2m 1s
14:	learn: 0.4556609	total: 1.85s	remaining: 2m 1s
15:	learn: 0.4542157	total: 1.99s	remaining: 2m 2s
16:	learn: 0.4526971	total: 2.1s	remaining: 2m 1s
17:	learn: 0.4510014	total: 2.27s	remaining: 2m 3s
18:	learn: 0.4497599	total: 2.37s	remaining: 2m 2s
19:	learn:

160:	learn: 0.4351874	total: 20.9s	remaining: 1m 48s
161:	learn: 0.4351695	total: 21s	remaining: 1m 48s
162:	learn: 0.4351366	total: 21.1s	remaining: 1m 48s
163:	learn: 0.4351084	total: 21.2s	remaining: 1m 48s
164:	learn: 0.4350815	total: 21.4s	remaining: 1m 48s
165:	learn: 0.4350507	total: 21.5s	remaining: 1m 48s
166:	learn: 0.4350078	total: 21.7s	remaining: 1m 48s
167:	learn: 0.4349687	total: 21.8s	remaining: 1m 48s
168:	learn: 0.4349467	total: 21.9s	remaining: 1m 47s
169:	learn: 0.4349240	total: 22.1s	remaining: 1m 47s
170:	learn: 0.4348866	total: 22.2s	remaining: 1m 47s
171:	learn: 0.4348463	total: 22.3s	remaining: 1m 47s
172:	learn: 0.4348189	total: 22.5s	remaining: 1m 47s
173:	learn: 0.4347875	total: 22.6s	remaining: 1m 47s
174:	learn: 0.4347500	total: 22.7s	remaining: 1m 47s
175:	learn: 0.4347135	total: 22.8s	remaining: 1m 46s
176:	learn: 0.4346930	total: 23s	remaining: 1m 46s
177:	learn: 0.4346657	total: 23.1s	remaining: 1m 46s
178:	learn: 0.4346428	total: 23.2s	remaining: 1m 4

316:	learn: 0.4310267	total: 40.4s	remaining: 1m 27s
317:	learn: 0.4309986	total: 40.6s	remaining: 1m 27s
318:	learn: 0.4309811	total: 40.7s	remaining: 1m 26s
319:	learn: 0.4309625	total: 40.8s	remaining: 1m 26s
320:	learn: 0.4309426	total: 40.9s	remaining: 1m 26s
321:	learn: 0.4309169	total: 41.1s	remaining: 1m 26s
322:	learn: 0.4308979	total: 41.2s	remaining: 1m 26s
323:	learn: 0.4308753	total: 41.4s	remaining: 1m 26s
324:	learn: 0.4308523	total: 41.5s	remaining: 1m 26s
325:	learn: 0.4308194	total: 41.7s	remaining: 1m 26s
326:	learn: 0.4307947	total: 41.8s	remaining: 1m 26s
327:	learn: 0.4307634	total: 42s	remaining: 1m 25s
328:	learn: 0.4307446	total: 42.1s	remaining: 1m 25s
329:	learn: 0.4307138	total: 42.3s	remaining: 1m 25s
330:	learn: 0.4306933	total: 42.4s	remaining: 1m 25s
331:	learn: 0.4306710	total: 42.6s	remaining: 1m 25s
332:	learn: 0.4306472	total: 42.7s	remaining: 1m 25s
333:	learn: 0.4306296	total: 42.9s	remaining: 1m 25s
334:	learn: 0.4306134	total: 43s	remaining: 1m 2

473:	learn: 0.4276216	total: 1m	remaining: 1m 7s
474:	learn: 0.4275993	total: 1m 1s	remaining: 1m 7s
475:	learn: 0.4275793	total: 1m 1s	remaining: 1m 7s
476:	learn: 0.4275443	total: 1m 1s	remaining: 1m 7s
477:	learn: 0.4275228	total: 1m 1s	remaining: 1m 7s
478:	learn: 0.4275027	total: 1m 1s	remaining: 1m 6s
479:	learn: 0.4274741	total: 1m 1s	remaining: 1m 6s
480:	learn: 0.4274574	total: 1m 1s	remaining: 1m 6s
481:	learn: 0.4274395	total: 1m 1s	remaining: 1m 6s
482:	learn: 0.4274163	total: 1m 2s	remaining: 1m 6s
483:	learn: 0.4273979	total: 1m 2s	remaining: 1m 6s
484:	learn: 0.4273819	total: 1m 2s	remaining: 1m 6s
485:	learn: 0.4273580	total: 1m 2s	remaining: 1m 6s
486:	learn: 0.4273328	total: 1m 2s	remaining: 1m 5s
487:	learn: 0.4273166	total: 1m 2s	remaining: 1m 5s
488:	learn: 0.4272941	total: 1m 2s	remaining: 1m 5s
489:	learn: 0.4272761	total: 1m 2s	remaining: 1m 5s
490:	learn: 0.4272567	total: 1m 3s	remaining: 1m 5s
491:	learn: 0.4272405	total: 1m 3s	remaining: 1m 5s
492:	learn: 0.4

630:	learn: 0.4246081	total: 1m 21s	remaining: 47.5s
631:	learn: 0.4245958	total: 1m 21s	remaining: 47.3s
632:	learn: 0.4245817	total: 1m 21s	remaining: 47.2s
633:	learn: 0.4245694	total: 1m 21s	remaining: 47.1s
634:	learn: 0.4245521	total: 1m 21s	remaining: 46.9s
635:	learn: 0.4245345	total: 1m 21s	remaining: 46.8s
636:	learn: 0.4245164	total: 1m 21s	remaining: 46.7s
637:	learn: 0.4245030	total: 1m 22s	remaining: 46.6s
638:	learn: 0.4244815	total: 1m 22s	remaining: 46.4s
639:	learn: 0.4244627	total: 1m 22s	remaining: 46.3s
640:	learn: 0.4244485	total: 1m 22s	remaining: 46.2s
641:	learn: 0.4244279	total: 1m 22s	remaining: 46s
642:	learn: 0.4244065	total: 1m 22s	remaining: 45.9s
643:	learn: 0.4243892	total: 1m 22s	remaining: 45.8s
644:	learn: 0.4243731	total: 1m 22s	remaining: 45.7s
645:	learn: 0.4243554	total: 1m 23s	remaining: 45.5s
646:	learn: 0.4243312	total: 1m 23s	remaining: 45.4s
647:	learn: 0.4243166	total: 1m 23s	remaining: 45.3s
648:	learn: 0.4242980	total: 1m 23s	remaining: 4

786:	learn: 0.4219511	total: 1m 41s	remaining: 27.5s
787:	learn: 0.4219401	total: 1m 41s	remaining: 27.3s
788:	learn: 0.4219244	total: 1m 41s	remaining: 27.2s
789:	learn: 0.4219037	total: 1m 41s	remaining: 27.1s
790:	learn: 0.4218806	total: 1m 41s	remaining: 26.9s
791:	learn: 0.4218709	total: 1m 42s	remaining: 26.8s
792:	learn: 0.4218515	total: 1m 42s	remaining: 26.7s
793:	learn: 0.4218322	total: 1m 42s	remaining: 26.6s
794:	learn: 0.4218167	total: 1m 42s	remaining: 26.4s
795:	learn: 0.4218042	total: 1m 42s	remaining: 26.3s
796:	learn: 0.4217908	total: 1m 42s	remaining: 26.2s
797:	learn: 0.4217780	total: 1m 42s	remaining: 26s
798:	learn: 0.4217636	total: 1m 43s	remaining: 25.9s
799:	learn: 0.4217470	total: 1m 43s	remaining: 25.8s
800:	learn: 0.4217294	total: 1m 43s	remaining: 25.7s
801:	learn: 0.4217150	total: 1m 43s	remaining: 25.5s
802:	learn: 0.4216912	total: 1m 43s	remaining: 25.4s
803:	learn: 0.4216718	total: 1m 43s	remaining: 25.3s
804:	learn: 0.4216557	total: 1m 43s	remaining: 2

942:	learn: 0.4193750	total: 2m 1s	remaining: 7.35s
943:	learn: 0.4193579	total: 2m 1s	remaining: 7.22s
944:	learn: 0.4193398	total: 2m 1s	remaining: 7.09s
945:	learn: 0.4193241	total: 2m 2s	remaining: 6.97s
946:	learn: 0.4193139	total: 2m 2s	remaining: 6.84s
947:	learn: 0.4192913	total: 2m 2s	remaining: 6.71s
948:	learn: 0.4192782	total: 2m 2s	remaining: 6.58s
949:	learn: 0.4192634	total: 2m 2s	remaining: 6.46s
950:	learn: 0.4192484	total: 2m 2s	remaining: 6.33s
951:	learn: 0.4192330	total: 2m 2s	remaining: 6.2s
952:	learn: 0.4192142	total: 2m 3s	remaining: 6.07s
953:	learn: 0.4191970	total: 2m 3s	remaining: 5.94s
954:	learn: 0.4191785	total: 2m 3s	remaining: 5.81s
955:	learn: 0.4191661	total: 2m 3s	remaining: 5.68s
956:	learn: 0.4191548	total: 2m 3s	remaining: 5.56s
957:	learn: 0.4191433	total: 2m 3s	remaining: 5.43s
958:	learn: 0.4191239	total: 2m 3s	remaining: 5.3s
959:	learn: 0.4191073	total: 2m 4s	remaining: 5.17s
960:	learn: 0.4190912	total: 2m 4s	remaining: 5.04s
961:	learn: 0.

OSError: [Errno 22] Invalid argument: '<catboost..csv'

voting classifier

In [368]:
estimators = [('xgb', XGBClassifier()), ('rf', RandomForestClassifier())]

vote = VotingClassifier(estimators, n_jobs = -1, verbose = 2)

predictions(vote)



Train 

Confusion Matrix 
 [[131206      0]
 [ 28138  12663]]
Accuracy Score 
 0.8364136343288354
Classification Report 
               precision    recall  f1-score   support

         0.0       0.82      1.00      0.90    131206
         1.0       1.00      0.31      0.47     40801

    accuracy                           0.84    172007
   macro avg       0.91      0.66      0.69    172007
weighted avg       0.87      0.84      0.80    172007

Test 

Confusion Matrix 
 [[54739  1492]
 [13773  3714]]
Accuracy Score 
 0.7929271005724517
Classification Report 
               precision    recall  f1-score   support

         0.0       0.80      0.97      0.88     56231
         1.0       0.71      0.21      0.33     17487

    accuracy                           0.79     73718
   macro avg       0.76      0.59      0.60     73718
weighted avg       0.78      0.79      0.75     73718



In [369]:
predictions(vote, evaluation=False, full = True)



In [None]:
staking

In [None]:
cl_1 = RandomForestClassifier()
cl_2 = XGBClassifier()

meta = RandomForestClassifier()

stack = StackingCVClassifier(classifiers=[cl_1, cl_2], meta_classifier = meta, n_jobs = -1, verbose = 3)

In [None]:
predictions(stack)

In [None]:
predictions(stack, evaluation=False, full = True)