<a href="https://colab.research.google.com/github/AlgoAIBoss/AutoInland-Vehicle-Insurance-Claim-Challenge/blob/main/Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AlgoAIBoss/AutoInland-Vehicle-Insurance-Claim-Challenge/blob/main/Model.ipynb)



---

# Start

---



In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, f1_score,  confusion_matrix
from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV, train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import pandas_profiling as pp
from pandas_profiling import ProfileReport


In [None]:
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
# Upload Data

Test_ID = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/KAGGLE/ZINDI_AUTO_INSURANCE/Dataset/Test.csv')

test = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/KAGGLE/ZINDI_AUTO_INSURANCE/DATA/TEST_LAB_CAT.csv')

train = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/KAGGLE/ZINDI_AUTO_INSURANCE/DATA/TRAIN_LAB_CAT.csv')

In [None]:
# Split Train / Test

X = train.drop('target', axis=1)
y = train.target

In [None]:
from imblearn.over_sampling import RandomOverSampler

ros = RandomOverSampler()
X_ros, y_ros = ros.fit_resample(X, y)

In [None]:
X_ros = pd.DataFrame(X_ros)
y_ros = pd.Series(y_ros)

In [None]:
X_ros

In [None]:
test.columns = X_ros.columns.values

In [None]:

X_train, X_test, y_train, y_test = train_test_split(X_ros, y_ros, train_size=0.70, random_state=42)



---



# CatBoost - ML algorithm




---




In [None]:
!pip install catboost
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension

In [None]:
# Specify Categorical features for Catboost

print(X_train.dtypes)
cat_features_train = np.where(X_train.dtypes != np.float)[0]

cat_features_train

In [None]:
# import CatBoost Classifier

from catboost import CatBoostClassifier, Pool, cv

In [None]:
# Train and Test for CatBoost

train_pool = Pool(X_train, y_train, cat_features=cat_features_train)

test_pool = Pool(test, cat_features=cat_features_train)

In [None]:
CV = StratifiedKFold(n_splits=5)

Cat_grid = {
    'depth': [2, 3, 5, 8],
    'learning_rate': [0.01, 0.03, 0.1],
    "max_ctr_complexity":[ 8, 10, 12],
    'bagging_temperature': [0, 1, 3, 5],
    'l2_leaf_reg': [3, 5, 7, 9],
    #"border_count": [7, 10, 15, 50],


    # Default
    "iterations": [500], 
    'loss_function': ['CrossEntropy'],
    "eval_metric":['F1'],
    "random_seed":[42],
    "task_type": ['GPU'], 
    "border_count":[32],
    'verbose': [1000],
    #'scale_pos_weight': [90]
}


Cat_CV = RandomizedSearchCV(Cat_model, Cat_grid, cv = CV, scoring = ['f1'], refit = 'f1', random_state=42, n_iter=10)

Cat_CV.fit(X_train, y_train)

In [None]:
print(f'Best Accuracy: {Cat_CV.best_score_:.3f}\n')
print(f'Best parameter set: {Cat_CV.best_params_}\n')

Best Accuracy: 0.962

Best parameter set: {'verbose': 1000, 'task_type': 'GPU', 'random_seed': 42, 'max_ctr_complexity': 10, 'loss_function': 'CrossEntropy', 'learning_rate': 0.1, 'l2_leaf_reg': 3, 'iterations': 500, 'eval_metric': 'F1', 'depth': 8, 'border_count': 32, 'bagging_temperature': 1}



In [None]:
# Predict    65
Cat_prediction = Cat_model.predict(X_test)



print(f'Scores: {classification_report(y_test, Cat_prediction)}\n')
print(f'F1-score: {f1_score(y_test, Cat_prediction):.3f}\n')
print(f'Accuracy: {balanced_accuracy_score(y_test, Cat_prediction):.3f}')




Scores:               precision    recall  f1-score   support

           0       1.00      0.94      0.97      3190
           1       0.94      1.00      0.97      3185

    accuracy                           0.97      6375
   macro avg       0.97      0.97      0.97      6375
weighted avg       0.97      0.97      0.97      6375


F1-score: 0.968

Accuracy: 0.967


In [None]:
print(confusion_matrix(y_test, Cat_prediction))

[[2987  203]
 [  10 3175]]


In [None]:
# Feature Importance

model = CatBoostClassifier(iterations=50, random_seed=42, logging_level='Silent').fit(train_pool)

feature_importance = model.get_feature_importance(train_pool)
feature_names = X_train.columns

for score, name in sorted(zip(feature_importance, feature_names), reverse=True):
  print('{}: {}'.format(name, score))

In [None]:
# CatBoost parameters
params = {             
          'combinations_ctr' : ['FloatTargetMeanValue'],
          'simple_ctr':['FloatTargetMeanValue'],
          'cat_features':cat_features_train,
          'early_stopping_rounds':200,
          'verbose': 1000, 
          'task_type': 'GPU', 
          'random_seed': 42, 
          'max_ctr_complexity': 10, 
          'loss_function': 'CrossEntropy', 
          'learning_rate': 0.1, 
          # 'l2_leaf_reg': 3, 
          'iterations': 10000, 
          'eval_metric': 'F1', 
          'depth': 8, 
          # 'border_count': 32, 
          # 'bagging_temperature': 1
          }           


# Model
Cat_model = CatBoostClassifier(**params)

Cat_model.fit(X_ros, y_ros)

In [None]:
submission_df = pd.DataFrame()
submission_df["ID"] = ID["ID"]
submission_df["target"] = RF_prediction

submission_df.to_csv("RF.csv", index=False)



---

# Deep Learning 

---



In [None]:
pipe = MLPClassifier()

scv = StratifiedKFold(n_splits = 5)

# Create param grid.

params = [
    {
        'hidden_layer_sizes': [50, 100, 150, 200],
        'activation': ['relu'],
        'alpha': [0.0001, 0.001, 0.01],
        'solver': ['sgd', 'adam'],
        # 'batch_size':['auto', 500, 200],
        # 'learning_rate': ['constant', 'invscaling', 'adaptive'],
        'max_iter':[200, 300, 400],
        'learning_rate_init': [0.001, 0.01, 0.1, 0.3],
        'random_state': [42]
    }
]

# Create grid search object

MLP_CV = RandomizedSearchCV(estimator=pipe, param_distributions=params, cv = scv, scoring = ['f1'], refit = 'f1', verbose=10, n_jobs = -1, n_iter=10, random_state=42)

# Fit on data

MLP_CV.fit(X_train, y_train)




In [None]:
# Scores
print(f'Best Accuracy: {MLP_CV.best_score_:.3f}\n')
print(f'Best parameter set: {MLP_CV.best_params_}\n')

Best Accuracy: 0.687

Best parameter set: {'solver': 'adam', 'random_state': 42, 'max_iter': 300, 'learning_rate_init': 0.01, 'hidden_layer_sizes': 50, 'alpha': 0.0001, 'activation': 'relu'}



In [None]:
# Predict    0.066
MLP_prediction = MLP_CV.predict(X_test)



print(f' {classification_report(y_test, MLP_prediction)}\n')
print(f'F1-score: {f1_score(y_test, MLP_prediction):.3f}\n')
print(f'Accuracy: {balanced_accuracy_score(y_test, MLP_prediction):.3f}')
print(confusion_matrix(y_test, MLP_prediction))



               precision    recall  f1-score   support

           0       0.68      0.61      0.64      3190
           1       0.65      0.71      0.67      3185

    accuracy                           0.66      6375
   macro avg       0.66      0.66      0.66      6375
weighted avg       0.66      0.66      0.66      6375


F1-score: 0.675

Accuracy: 0.659
[[1952 1238]
 [ 933 2252]]


### BernoulliRBM

In [None]:
BRBM_pipe = BernoulliRBM()

scv = StratifiedKFold(n_splits = 2)

# Create param grid.

BRBM_params = [
    {
        'n_components':[200, 256, 320, 370],
        'n_iter':[10, 20, 35, 47, 55],
        'learning_rate': [0.001, 0.01, 0.1, 0.3],
        'batch_size':[10, 15, 20, 30, 35],
        'random_state': [42]
    }
]

# Create grid search object

BRBM_CV = RandomizedSearchCV(estimator=BRBM_pipe, param_distributions=BRBM_params, cv = scv, scoring = ['f1'], refit = 'f1', verbose=10, n_jobs = -1, n_iter=2, random_state=42)

# Fit on data

BRBM_CV.fit(X_train, y_train)




In [None]:
# Scores
print(f'Best Accuracy: {BRBM_CV.best_score_:.3f}\n')
print(f'Best parameter set: {BRBM_CV.best_params_}\n')

In [None]:
# Predict    0.066
BRBM_prediction = BRBM_CV.predict(X_test)



print(f' {classification_report(y_test, BRBM_prediction)}\n')
print(f'F1-score: {f1_score(y_test, BRBM_prediction):.3f}')
print(f'Accuracy: {balanced_accuracy_score(y_test, BRBM_prediction):.3f}')
print(confusion_matrix(y_test, BRBM_prediction))





---

# Sklearn GradientBoosting

---



In [None]:
pipe = GradientBoostingClassifier()

scv = StratifiedKFold(n_splits = 3)

# Create param grid.

params = [
    {
        'n_estimators': [90],
        'loss': ['deviance', 'exponential'],
        'criterion': ['friedman_mse', 'mse', 'mae'],
        # 'min_samples_leaf': [1, 3, 5, 8, 10, 12, 15],
        # 'min_samples_split': [2,3, 4, 5, 8],
        'learning_rate':[0.01, 0.05, 0.1],
        'max_depth': [ 8, 10, 15, 20, 25],
        'max_features': ['auto', 'sqrt', 'log2'],
        'random_state': [42]
    }
]

# Create grid search object

GBC_CV = RandomizedSearchCV(pipe, params, cv = scv, scoring = ['f1'], refit = 'f1', verbose=True, n_jobs = -1, random_state=42, n_iter=10)

# Fit on data

GBC_CV.fit(X_train, y_train)




Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed: 25.1min finished


RandomizedSearchCV(cv=StratifiedKFold(n_splits=3, random_state=None, shuffle=False),
                   error_score=nan,
                   estimator=GradientBoostingClassifier(ccp_alpha=0.0,
                                                        criterion='friedman_mse',
                                                        init=None,
                                                        learning_rate=0.1,
                                                        loss='deviance',
                                                        max_depth=3,
                                                        max_features=None,
                                                        max_leaf_nodes=None,
                                                        min_impurity_decrease=0.0,
                                                        min_impurity_split=None,
                                                        min_samples_leaf=1,
                                                  

In [None]:
# Scores
print(f'Best Accuracy: {GBC_CV.best_score_:.3f}\n')
print(f'Best parameter set: {GBC_CV.best_params_}\n')


Best Accuracy: 0.936

Best parameter set: {'random_state': 42, 'n_estimators': 90, 'max_features': 'log2', 'max_depth': 15, 'loss': 'deviance', 'learning_rate': 0.1, 'criterion': 'mse'}



In [None]:
# Predict  0.417
GBC_prediction = GBC_CV.predict(X_test)



print(f' {classification_report(y_test, GBC_prediction)}\n')
print(f'F1-score: {f1_score(y_test, GBC_prediction):.3f}')
print(f'Accuracy: {balanced_accuracy_score(y_test, GBC_prediction):.3f}')
print(confusion_matrix(y_test, GBC_prediction))




               precision    recall  f1-score   support

           0       0.99      0.92      0.95      3190
           1       0.92      0.99      0.96      3185

    accuracy                           0.95      6375
   macro avg       0.96      0.95      0.95      6375
weighted avg       0.96      0.95      0.95      6375


F1-score: 0.955
Accuracy: 0.954
[[2927  263]
 [  33 3152]]


In [None]:
submission['GradientBoosting'] = GBC_prediction
submission['GBC_Accuracy'] = '90%'

In [None]:
!pip install vowpalwabbit

In [None]:
from vowpalwabbit import pyvw

In [None]:
from vowpalwabbit.sklearn_vw import VWClassifier

# build model
model = VWClassifier()
model.fit(X_train, y_train)

# predict model
y_pred = model.predict(X_test)

# evaluate model
model.score(X_train, y_train)
model.score(X_test, y_test)




---

# Decision Tree

---



In [None]:
pipe = DecisionTreeClassifier()


scv = StratifiedKFold(n_splits = 5)


# Create param grid.

params = [
    {
        'criterion': ['gini', 'entropy'],
        'splitter': ['best', 'random'],
        # 'min_samples_leaf': [1, 3, 5, 8, 10],
        # 'min_samples_split': [2, 4, 8, 10],
        'max_depth': ['None', 5, 8, 10, 15, 20, 25],
        # 'class_weight':['None', 'balanced', 'balanced_subsample'],
        # 'max_leaf_nodes': ['None', 50, 80, 100],
        # 'max_features':['auto', 'sqrt', 'log2'],
        #'min_weight_fraction_leaf':[np.arange(0.0, 10.0, 0.2)],
        #'min_impurity_decrease': [np.arange(0.0, 10.0, 0.2)],
        #'min_impurity_split': [0, 3, 5, 8, 10],
        'random_state': [42]
    }
]




# Create grid search object

DT_CV = RandomizedSearchCV(pipe, params, cv = scv, scoring = ['f1'], refit = 'f1', n_jobs = -1, random_state=42)




# Fit on data

DT_CV.fit(X_train, y_train)



# Scores
print(f'Best Accuracy: {DT_CV.best_score_:.3f}\n')
print(f'Best parameter set: {DT_CV.best_params_}\n')

Best Accuracy: 0.902

Best parameter set: {'splitter': 'best', 'random_state': 42, 'max_depth': 25, 'criterion': 'gini'}



In [None]:
# Predict   0.555
DT_prediction = DT_CV.predict(X_test)



print(f' {classification_report(y_test, DT_prediction)}\n')
print(f'F1-score: {f1_score(y_test, DT_prediction):.3f}')
print(f'Accuracy: {balanced_accuracy_score(y_test, DT_prediction):.3f}')



               precision    recall  f1-score   support

           0       0.99      0.82      0.90      3190
           1       0.85      0.99      0.91      3185

    accuracy                           0.91      6375
   macro avg       0.92      0.91      0.91      6375
weighted avg       0.92      0.91      0.91      6375


F1-score: 0.914
Accuracy: 0.907


In [None]:
print(confusion_matrix(y_test, DT_prediction))

[[2631  559]
 [  36 3149]]


In [None]:
submission['DecisionTree'] = DT_prediction
submission['DT_Accuracy'] = '87%'



---

# XGBoost

---



In [None]:
import time


In [None]:
# ML model
model = XGBClassifier()

# Cross-validation
CV = StratifiedKFold(n_splits=5)


# Create the grid search parameter grid and scoring funcitons
param_grid = {
    "learning_rate": [0.03, 0.01, 0.1],
    "colsample_bytree": [0.3, 0.6, 0.8, 1.0],
    "subsample": [0.6, 0.8, 1.0, 1.2],
    "max_depth": [2, 3, 5, 8, 10],
    "reg_lambda": [1, 1.5, 2],
    #'reg_alpha':[0, 1e-5, 1e-2, 0.1, 1, 100],
    "gamma": [0, 0.1, 0.3],
    # 'min_child_weight':np.arange(1, 8, 1, dtype=int),
    'min_child_weight':np.arange(1,6,2, dtype=int),
    


    # Default
    "n_estimators": [1000],
    'booster':['gbtree'],
    'sampling_method':['gradient_based'],
    'objective':['binary:logistic'],
    'tree_method':["gpu_hist"],
    'random_state':[42], 
    'eval_metric':["logloss"],
    # 'scale_pos_weight':[90]
}




XGB_model = RandomizedSearchCV( estimator=model, param_distributions=param_grid, cv=CV, scoring='f1', n_jobs=-1, n_iter=10, refit="f1", verbose=True, random_state=42)



# fit grid search
XGB_tunned_model = XGB_model.fit(X_train, y_train)

In [None]:
print(f'Best score: {XGB_tunned_model.best_score_}')
print(f'Best model: {XGB_tunned_model.best_params_}')

Best score: 0.931709273414776
Best model: {'tree_method': 'gpu_hist', 'subsample': 1.0, 'sampling_method': 'gradient_based', 'reg_lambda': 1.5, 'random_state': 42, 'objective': 'binary:logistic', 'n_estimators': 1000, 'min_child_weight': 1, 'max_depth': 8, 'learning_rate': 0.1, 'gamma': 0, 'eval_metric': 'logloss', 'colsample_bytree': 1.0, 'booster': 'gbtree'}


In [None]:
XGB_prediction = XGB_tunned_model.predict(X_test)

# 58

print('Test F1 Score: ', f1_score(y_test, XGB_prediction))
print('\nConfusion Matrix:')
print(confusion_matrix(y_test, XGB_prediction))
print('\nClassification Report:')
print(classification_report(y_test, XGB_prediction))




Test F1 Score:  0.9391720221192646

Confusion Matrix:
[[2826  364]
 [  43 3142]]

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.89      0.93      3190
           1       0.90      0.99      0.94      3185

    accuracy                           0.94      6375
   macro avg       0.94      0.94      0.94      6375
weighted avg       0.94      0.94      0.94      6375





---

# LightGBM

---



In [None]:
# ML model
LGBM_model = LGBMClassifier()

# Cross-validation
CV = StratifiedKFold(n_splits=5)

# CV Parameters
gridParams = {
    'learning_rate':   [0.01, 0.1, 0.03, 0.3, 0.5],
    'max_depth' :      [2, 3,5,6,7,8],
    # 'colsample_bytree':[0.3, 0.5,0.7, 0.9, 1.0],
    # 'subsample' :      [0.1, 0.3, 0.5, 0.7, 0.9],
    # 'min_split_gain' : [0.01, 0.1, 0.3, 0.5],
    # 'min_data_in_leaf':[5, 15, 10, 20, 27],
    # 'max_bin':         [150, 200, 250, 300, 320, 350],   #1
    # 'num_leaves':      [31, 51, 71, 90, 100],        #1
    #'categorical_feature':


    #Default
    # 'is_unbalance': [True],
    # 'use_missing':  [True],
    'n_iterations': [1000],
    'random_state': [42], 
    # 'class_weight': ['balanced'],
    'objective' :   ['binary'],
    'boosting_type':['gbdt'],
    'metric':       ['cross_entropy'],
    
    }


# Initialize a RandomizedSearchCV object using 5-fold CV-
LGBM_CV = RandomizedSearchCV(LGBM_model, gridParams, cv = CV, scoring='f1',  n_iter=10, refit="f1", verbose=True, random_state=42)

# Train on training data-
LGBM_CV.fit(X_train, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed:   22.2s finished


RandomizedSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False),
                   error_score=nan,
                   estimator=LGBMClassifier(boosting_type='gbdt',
                                            class_weight=None,
                                            colsample_bytree=1.0,
                                            importance_type='split',
                                            learning_rate=0.1, max_depth=-1,
                                            min_child_samples=20,
                                            min_child_weight=0.001,
                                            min_split_gain=0.0,
                                            n_estimators=100, n_jobs=-1,
                                            num_leaves=31, objectiv...
                                            subsample_freq=0),
                   iid='deprecated', n_iter=10, n_jobs=None,
                   param_distributions={'boosting_type': ['gbdt'],
     

In [None]:
print('Best parameters found by grid search are:', LGBM_CV.best_params_)
print('Best score found by grid search is:', LGBM_CV.best_score_)

Best parameters found by grid search are: {'random_state': 42, 'objective': 'binary', 'n_iterations': 1000, 'metric': 'cross_entropy', 'max_depth': 7, 'learning_rate': 0.5, 'boosting_type': 'gbdt'}
Best score found by grid search is: 0.9130670761280163


In [None]:
LGBM_prediction = LGBM_CV.predict(X_test)

# 61

print('Test F1_score: ', f1_score(y_test, LGBM_prediction))
print('\nConfusion Matrix:')
print(confusion_matrix(y_test, LGBM_prediction))
print('\nClassification Report:')
print(classification_report(y_test, LGBM_prediction))


Test F1_score:  0.9230994580342757

Confusion Matrix:
[[2699  491]
 [  34 3151]]

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.85      0.91      3190
           1       0.87      0.99      0.92      3185

    accuracy                           0.92      6375
   macro avg       0.93      0.92      0.92      6375
weighted avg       0.93      0.92      0.92      6375



In [None]:
submission['LightGBM'] = LGBM_prediction
submission['LGBM_Accuracy'] = '91%'



---

# Bagging

---



In [None]:
# ML Model hyperparameters

               
LGBM_params = {'random_state': 42, 'objective': 'binary', 'n_iterations': 1000, 'metric': 'cross_entropy', 'max_depth': 7, 'learning_rate': 0.5, 'boosting_type': 'gbdt'}

XGBM_params = {'tree_method': 'gpu_hist', 'subsample': 1.0, 'sampling_method': 'gradient_based', 'reg_lambda': 1.5, 'random_state': 42, 'objective': 'binary:logistic', 'n_estimators': 1000, 'min_child_weight': 1, 'max_depth': 8, 'learning_rate': 0.1, 'gamma': 0, 'eval_metric': 'logloss', 'colsample_bytree': 1.0, 'booster': 'gbtree'}

CatB_params = {'combinations_ctr' : ['FloatTargetMeanValue'], 'simple_ctr':['FloatTargetMeanValue'], 'cat_features':cat_features_train, 'early_stopping_rounds':200, 'verbose': 1000, 'task_type': 'GPU', 'random_seed': 42, 'max_ctr_complexity': 10, 'loss_function': 'CrossEntropy', 'learning_rate': 0.1, 'l2_leaf_reg': 3, 'iterations': 10000, 'eval_metric': 'F1', 'depth': 8, 'border_count': 32, 'bagging_temperature': 1}

RF_params = {'random_state': 42, 'n_estimators': 350, 'max_features': 'sqrt', 'max_depth': 15, 'criterion': 'gini'}

LR_params = {'solver': 'newton-cg', 'random_state': 42, 'penalty': 'l2', 'max_iter': 250, 'C': 10}

DT_params = {'splitter': 'best', 'random_state': 42, 'max_depth': 25, 'criterion': 'gini'}

KNN_params = {'weights': 'distance', 'p': 1, 'n_neighbors': 10, 'leaf_size': 100, 'algorithm': 'brute'}

SVC_params = {'shrinking': True, 'random_state': 42, 'max_iter': -1, 'kernel': 'rbf', 'gamma': 'auto', 'degree': 3, 'decision_function_shape': 'ovr', 'class_weight': 'balanced', 'C': 100}

GBC_params = {'random_state': 42, 'n_estimators': 90, 'max_features': 'log2', 'max_depth': 15, 'loss': 'deviance', 'learning_rate': 0.1, 'criterion': 'mse'}

EXT_params = {'random_state': 42, 'n_estimators': 300, 'max_depth': 15, 'criterion': 'entropy'}

MLP_params = {'solver': 'adam', 'random_state': 42, 'max_iter': 300, 'learning_rate_init': 0.01, 'hidden_layer_sizes': 50, 'alpha': 0.0001, 'activation': 'relu'}

In [None]:
# LR_params = {LR_cv.best_params_}
# RF_params = {RF_CV.best_params_}
# KNN_params = {KNN_CV.best_params_}
# SVM_params = {SVC_CV.best_params_}
# EXT_params = {EXT_CV.best_params_}
# MLP_params = {MLP_CV.best_params_}
# DT_params = {DT_CV.best_params_}
# XGBM_params = {XGB_tunned_model.best_params_}
# LGBM_params = LGBM_CV.best_params_

In [None]:
# Bagging Models

LGBM = LGBMClassifier(**LGBM_params)

XGBM = XGBClassifier(**XGBM_params)

CatBoost = CatBoostClassifier(**CatB_params)

RF = RandomForestClassifier(**RF_params)

LR = LogisticRegression(**LR_params)

KNN = KNeighborsClassifier(**KNN_params)

SVC = SVC(**SVC_params)

GBC = GradientBoostingClassifier(**GBC_params)

EXT = ExtraTreesClassifier(**EXT_params)

DT = DecisionTreeClassifier(**DT_params)

MLP = MLPClassifier(**MLP_params)

In [None]:
# LightGBM Model



LGBM_Bagging = BaggingClassifier(base_estimator=LGBM, n_estimators=25, random_state=42).fit(X_ros, y_ros)

LGBM_Bagging_prediction = LGBM_Bagging.predict(test)




In [None]:
# Metrix  611

print('Test Accuraccy: ', balanced_accuracy_score(y_test, LGBM_Bagging_prediction))
print(f'{classification_report(y_test, LGBM_Bagging_prediction)}\n')
print(f'F1-score: {f1_score(y_test, LGBM_Bagging_prediction):.3f}\n')
print(confusion_matrix(y_test, LGBM_Bagging_prediction))



Test Accuraccy:  0.7968219001823053
              precision    recall  f1-score   support

         0.0       0.95      0.93      0.94      3187
         1.0       0.57      0.66      0.61       437

    accuracy                           0.90      3624
   macro avg       0.76      0.80      0.78      3624
weighted avg       0.91      0.90      0.90      3624


F1-score: 0.611

[[2964  223]
 [ 147  290]]


In [None]:
submission['LGBM_Bagging'] = LGBM_Bagging_prediction
submission['LGBM_Bagging_Accuracy'] = '93%'

In [None]:
# XGBoost Model




XGBoost_Bagging = BaggingClassifier(base_estimator=XGBM, n_estimators=13, random_state=42).fit(X_train, y_train)

XGBoost_Bagging_prediction = XGBoost_Bagging.predict(X_test)




In [None]:
# Metrix

print('Test Accuraccy: ', balanced_accuracy_score(y_test, XGBoost_Bagging_prediction))
print(f'{classification_report(y_test, XGBoost_Bagging_prediction)}\n')
print(f'F1-score: {f1_score(y_test, XGBoost_Bagging_prediction):.3f}\n')
print(confusion_matrix(y_test, XGBoost_Bagging_prediction))



Test Accuraccy:  0.9024390243902439
              precision    recall  f1-score   support

           0       0.95      0.73      0.83        52
           1       0.89      0.98      0.93       112

    accuracy                           0.90       164
   macro avg       0.92      0.86      0.88       164
weighted avg       0.91      0.90      0.90       164


F1-score: 0.932

[[ 38  14]
 [  2 110]]


In [None]:
submission['XGBM_Bagging'] = XGBoost_Bagging_prediction
submission['XGBM_Bagging_Accuracy'] = '90%'

In [None]:
# KNN Model




KNN_Bagging = BaggingClassifier(base_estimator=KNN, n_estimators=13, random_state=42).fit(X_train, y_train)

KNN_Bagging_prediction = KNN_Bagging.predict(X_test)




In [None]:
# Metrix  0.204

print('Test Accuraccy: ', balanced_accuracy_score(y_test, KNN_Bagging_prediction))
print(f'{classification_report(y_test, KNN_Bagging_prediction)}\n')
print(f'F1-score: {f1_score(y_test, KNN_Bagging_prediction):.3f}\n')
print(confusion_matrix(y_test, KNN_Bagging_prediction))



Test Accuraccy:  0.4996172772006561
              precision    recall  f1-score   support

         0.0       0.89      0.95      0.92      2950
         1.0       0.11      0.05      0.07       372

    accuracy                           0.85      3322
   macro avg       0.50      0.50      0.49      3322
weighted avg       0.80      0.85      0.82      3322


F1-score: 0.067

[[2805  145]
 [ 354   18]]


In [None]:
submission['KNN_Bagging'] = KNN_Bagging_prediction
submission['KNN_Bagging_Accuracy'] = '69%'

In [None]:
# LogisticRegression Model



LR_Bagging = BaggingClassifier(base_estimator=LR, n_estimators=13, random_state=42).fit(X_train, y_train)

LR_Bagging_prediction = LR_Bagging.predict(X_test)




In [None]:
# Metrix   0.551

print('Test Accuraccy: ', balanced_accuracy_score(y_test, LR_Bagging_prediction))
print(f'{classification_report(y_test, LR_Bagging_prediction)}\n')
print(f'F1-score: {f1_score(y_test, LR_Bagging_prediction):.3f}\n')
print(confusion_matrix(y_test, LR_Bagging_prediction))



Test Accuraccy:  0.503523783488245
              precision    recall  f1-score   support

         0.0       0.89      1.00      0.94      2950
         1.0       0.50      0.01      0.02       372

    accuracy                           0.89      3322
   macro avg       0.69      0.50      0.48      3322
weighted avg       0.85      0.89      0.84      3322


F1-score: 0.016

[[2947    3]
 [ 369    3]]


In [None]:
submission['LR_Bagging'] = LR_Bagging_prediction
submission['LR_Bagging_Accuracy'] = '88%'

In [None]:
# RandomForest Model



RF_Bagging = BaggingClassifier(base_estimator=RF, n_estimators=13, random_state=42).fit(X_train, y_train)

RF_Bagging_prediction = RF_Bagging.predict(X_test)




In [None]:
# Metrix   0.594

print('Test Accuraccy: ', balanced_accuracy_score(y_test, RF_Bagging_prediction))
print(f'{classification_report(y_test, RF_Bagging_prediction)}\n')
print(f'F1-score: {f1_score(y_test, RF_Bagging_prediction):.3f}\n')
print(confusion_matrix(y_test, RF_Bagging_prediction))



Test Accuraccy:  0.6682768361581921
              precision    recall  f1-score   support

         0.0       0.93      0.75      0.83      2950
         1.0       0.23      0.58      0.33       372

    accuracy                           0.73      3322
   macro avg       0.58      0.67      0.58      3322
weighted avg       0.86      0.73      0.78      3322


F1-score: 0.330

[[2222  728]
 [ 155  217]]


In [None]:
submission['RF_Bagging'] = RF_Bagging_prediction
submission['RF_Bagging_Accuracy'] = '91%'

In [None]:
# SVC Model



SVC_Bagging = BaggingClassifier(base_estimator=SVC, n_estimators=13, random_state=42).fit(X_train, y_train)

SVC_Bagging_prediction = SVC_Bagging.predict(X_test)




In [None]:
# Metrix  0.33

print('Test Accuraccy: ', balanced_accuracy_score(y_test, SVC_Bagging_prediction))
print(f'{classification_report(y_test, SVC_Bagging_prediction)}\n')
print(f'F1-score: {f1_score(y_test, SVC_Bagging_prediction):.3f}\n')
print(confusion_matrix(y_test, SVC_Bagging_prediction))



Test Accuraccy:  0.6679715691634773
              precision    recall  f1-score   support

         0.0       0.96      0.53      0.68      2950
         1.0       0.18      0.81      0.29       372

    accuracy                           0.56      3322
   macro avg       0.57      0.67      0.49      3322
weighted avg       0.87      0.56      0.64      3322


F1-score: 0.291

[[1562 1388]
 [  72  300]]


In [None]:
submission['SVC_Bagging'] = SVC_Bagging_prediction
submission['SVC_Bagging_Accuracy'] = '78%'

In [None]:
# GradientBoosting Model



GBC_Bagging = BaggingClassifier(base_estimator=GBC, n_estimators=13, random_state=42).fit(X_train, y_train)

GBC_Bagging_prediction = GBC_Bagging.predict(X_test)




In [None]:
# Metrix   0.426

print('Test Accuraccy: ', balanced_accuracy_score(y_test, GBC_Bagging_prediction))
print(f'{classification_report(y_test, GBC_Bagging_prediction)}\n')
print(f'F1-score: {f1_score(y_test, GBC_Bagging_prediction):.3f}\n')
print(confusion_matrix(y_test, GBC_Bagging_prediction))



In [None]:
submission['GBC_Bagging'] = GBC_Bagging_prediction
submission['GBC_Bagging_Accuracy'] = '91%'

In [None]:
# ExtraTrees Model



EXT_Bagging = BaggingClassifier(base_estimator=EXT, n_estimators=13, random_state=42).fit(X_ros, y_ros)

EXT_Bagging_prediction = EXT_Bagging.predict(test)




In [None]:
# Metrix   0.435

print('Test Accuraccy: ', balanced_accuracy_score(y_test, EXT_Bagging_prediction))
print(f'{classification_report(y_test, EXT_Bagging_prediction)}\n')
print(f'F1-score: {f1_score(y_test, EXT_Bagging_prediction):.3f}\n')
print(confusion_matrix(y_test, EXT_Bagging_prediction))



Test Accuraccy:  0.5
              precision    recall  f1-score   support

         0.0       0.89      1.00      0.94      2950
         1.0       0.00      0.00      0.00       372

    accuracy                           0.89      3322
   macro avg       0.44      0.50      0.47      3322
weighted avg       0.79      0.89      0.84      3322


F1-score: 0.000

[[2950    0]
 [ 372    0]]


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
submission['EXT_Bagging'] = EXT_Bagging_prediction
submission['EXT_Bagging_Accuracy'] = '91%'

In [None]:
# DecisionTree Model



DT_Bagging = BaggingClassifier(base_estimator=DT, n_estimators=13, random_state=42).fit(X_train, y_train)

DT_Bagging_prediction = DT_Bagging.predict(X_test)




In [None]:
# Metrix   0.585

print('Test Accuraccy: ', balanced_accuracy_score(y_test, DT_Bagging_prediction))
print(f'{classification_report(y_test, DT_Bagging_prediction)}\n')
print(f'F1-score: {f1_score(y_test, DT_Bagging_prediction):.3f}\n')
print(confusion_matrix(y_test, DT_Bagging_prediction))



Test Accuraccy:  0.6883706943685073
              precision    recall  f1-score   support

         0.0       0.95      0.70      0.80      2950
         1.0       0.22      0.68      0.33       372

    accuracy                           0.70      3322
   macro avg       0.58      0.69      0.57      3322
weighted avg       0.86      0.70      0.75      3322


F1-score: 0.334

[[2063  887]
 [ 120  252]]


In [None]:
submission['DT_Bagging'] = DT_Bagging_prediction
submission['DT_Bagging_Accuracy'] = '88%'

In [None]:
# MLP Model



MLP_Bagging = BaggingClassifier(base_estimator=MLP, n_estimators=13, random_state=42).fit(X_train, y_train)

MLP_Bagging_prediction = MLP_Bagging.predict(X_test)




In [None]:
# Metrix   0.383

print('Test Accuraccy: ', balanced_accuracy_score(y_test, MLP_Bagging_prediction))
print(f'{classification_report(y_test, MLP_Bagging_prediction)}\n')
print(f'F1-score: {f1_score(y_test, MLP_Bagging_prediction):.3f}\n')
print(confusion_matrix(y_test, MLP_Bagging_prediction))



In [None]:
submission['MLP_Bagging'] = MLP_Bagging_prediction
submission['MLP_Bagging_Accuracy'] = '68%'



---

# Boosting

---



In [None]:
# LightGBM Model


LGBM_Boosting = AdaBoostClassifier(base_estimator=LGBM, n_estimators=13, random_state=42).fit(X_ros, y_ros)

LGBM_Boosting_prediction = LGBM_Boosting.predict(test)




In [None]:
# Metrix    0.466

print('Test Accuraccy: ', balanced_accuracy_score(y_test, LGBM_Boosting_prediction))
print(f'{classification_report(y_test, LGBM_Boosting_prediction)}\n')
print(f'F1-score: {f1_score(y_test, LGBM_Boosting_prediction):.3f}\n')
print(confusion_matrix(y_test, LGBM_Boosting_prediction))



Test Accuraccy:  0.5
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      2950
           1       0.00      0.00      0.00       372

    accuracy                           0.89      3322
   macro avg       0.44      0.50      0.47      3322
weighted avg       0.79      0.89      0.84      3322


F1-score: 0.000

[[2950    0]
 [ 372    0]]


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
submission['LGBM_Boosting'] = LGBM_Boosting_prediction
submission['LGBM_Boosting_Accuracy'] = '68%'

In [None]:
# LogisticRegression Model



LR_Boosting = AdaBoostClassifier(base_estimator=LR, n_estimators=13, random_state=42).fit(X_train, y_train)

LR_Boosting_prediction = LR_Boosting.predict(X_test)




In [None]:
# Metrix   0.546

print('Test Accuraccy: ', balanced_accuracy_score(y_test, LR_Boosting_prediction))
print(f'{classification_report(y_test, LR_Boosting_prediction)}\n')
print(f'F1-score: {f1_score(y_test, LR_Boosting_prediction):.3f}\n')
print(confusion_matrix(y_test, LR_Boosting_prediction))



Test Accuraccy:  0.5033542919628212
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      2950
           1       0.43      0.01      0.02       372

    accuracy                           0.89      3322
   macro avg       0.66      0.50      0.48      3322
weighted avg       0.84      0.89      0.84      3322


F1-score: 0.016

[[2946    4]
 [ 369    3]]


In [None]:
submission['LR_Boosting'] = LR_Boosting_prediction
submission['LR_Boosting_Accuracy'] = '87%'

In [None]:
# RandomForest Model



RF_Boosting = AdaBoostClassifier(base_estimator=RF, n_estimators=13, random_state=42).fit(X_train, y_train)

RF_Boosting_prediction = RF_Boosting.predict(X_test)




In [None]:
# Metrix   0.535

print('Test Accuraccy: ', balanced_accuracy_score(y_test, RF_Boosting_prediction))
print(f'{classification_report(y_test, RF_Boosting_prediction)}\n')
print(f'F1-score: {f1_score(y_test, RF_Boosting_prediction):.3f}\n')
print(confusion_matrix(y_test, RF_Boosting_prediction))



Test Accuraccy:  0.5868871878986697
              precision    recall  f1-score   support

           0       0.91      0.81      0.86      2950
           1       0.20      0.36      0.25       372

    accuracy                           0.76      3322
   macro avg       0.55      0.59      0.56      3322
weighted avg       0.83      0.76      0.79      3322


F1-score: 0.254

[[2400  550]
 [ 238  134]]


In [None]:
submission['RF_Boosting'] = RF_Boosting_prediction
submission['RF_Boosting_Accuracy'] = '91%'

In [None]:
# SVC Model



SVC_Boosting = AdaBoostClassifier(base_estimator=SVC, n_estimators=13, algorithm='SAMME', random_state=42).fit(X_train, y_train)

SVC_Boosting_prediction = SVC_Boosting.predict(X_test)




In [None]:
# Metrix

print('Test Accuraccy: ', balanced_accuracy_score(y_test, SVC_Boosting_prediction))
print(f'{classification_report(y_test, SVC_Boosting_prediction)}\n')
print(f'F1-score: {f1_score(y_test, SVC_Boosting_prediction):.3f}\n')
print(confusion_matrix(y_test, SVC_Boosting_prediction))



Test Accuraccy:  0.5246892655367231
              precision    recall  f1-score   support

           0       0.89      0.88      0.89      2950
           1       0.15      0.17      0.16       372

    accuracy                           0.80      3322
   macro avg       0.52      0.52      0.52      3322
weighted avg       0.81      0.80      0.81      3322


F1-score: 0.159

[[2604  346]
 [ 310   62]]


In [None]:
submission['SVC_Boosting'] = SVC_Boosting_prediction
submission['SVC_Boosting_Accuracy'] = '68%'

In [None]:
# GradientBoosting Model



GBC_Boosting = AdaBoostClassifier(base_estimator=GBC, n_estimators=13, random_state=42).fit(X_train, y_train)

GBC_Boosting_prediction = GBC_Boosting.predict(X_test)




In [None]:
# Metrix   0.403

print('Test Accuraccy: ', balanced_accuracy_score(y_test, GBC_Boosting_prediction))
print(f'{classification_report(y_test, GBC_Boosting_prediction)}\n')
print(f'F1-score: {f1_score(y_test, GBC_Boosting_prediction):.3f}\n')
print(confusion_matrix(y_test, GBC_Boosting_prediction))



Test Accuraccy:  0.869757174392936
              precision    recall  f1-score   support

         0.0       0.92      0.94      0.93      3187
         1.0       0.45      0.36      0.40       437

    accuracy                           0.87      3624
   macro avg       0.68      0.65      0.66      3624
weighted avg       0.86      0.87      0.86      3624


F1-score: 0.403

[[2993  194]
 [ 278  159]]


In [None]:
submission['GBC_Boosting'] = GBC_Boosting_prediction
submission['GBC_Boosting_Accuracy'] = '90%'

In [None]:
# ExtraTrees Model



EXT_Boosting = AdaBoostClassifier(base_estimator=EXT, n_estimators=13, random_state=42).fit(X_train, y_train)

EXT_Boosting_prediction = EXT_Boosting.predict(X_test)




In [None]:
# Metrix  0.441

print('Test Accuraccy: ', balanced_accuracy_score(y_test, EXT_Boosting_prediction))
print(f'{classification_report(y_test, EXT_Boosting_prediction)}\n')
print(f'F1-score: {f1_score(y_test, EXT_Boosting_prediction):.3f}\n')
print(confusion_matrix(y_test, EXT_Boosting_prediction))



Test Accuraccy:  0.5103663203936577
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      2950
           1       0.26      0.03      0.06       372

    accuracy                           0.88      3322
   macro avg       0.58      0.51      0.50      3322
weighted avg       0.82      0.88      0.84      3322


F1-score: 0.057

[[2916   34]
 [ 360   12]]


In [None]:
submission['EXT_Boosting'] = EXT_Boosting_prediction
submission['EXT_Boosting_Accuracy'] = '91%'

In [None]:
# DecisionTree Model



DT_Boosting = AdaBoostClassifier(base_estimator=DT, n_estimators=13, random_state=42).fit(X_train, y_train)

DT_Boosting_prediction = DT_Boosting.predict(X_test)




In [None]:
# Metrix   0.494

print('Test Accuraccy: ', balanced_accuracy_score(y_test, DT_Boosting_prediction))
print(f'{classification_report(y_test, DT_Boosting_prediction)}\n')
print(f'F1-score: {f1_score(y_test, DT_Boosting_prediction):.3f}\n')
print(confusion_matrix(y_test, DT_Boosting_prediction))



Test Accuraccy:  0.5606360488427191
              precision    recall  f1-score   support

           0       0.90      0.80      0.85      2950
           1       0.17      0.32      0.22       372

    accuracy                           0.75      3322
   macro avg       0.54      0.56      0.54      3322
weighted avg       0.82      0.75      0.78      3322


F1-score: 0.221

[[2372  578]
 [ 254  118]]


In [None]:
submission['DT_Boosting'] = DT_Boosting_prediction
submission['DT_Boosting_Accuracy'] = '87%'



---

# Stacking

---



In [None]:

from sklearn.svm import SVC

In [None]:
# Base Models

estimators = [
              ('LGBM', LGBMClassifier(**LGBM_params)),
              ('XGBM', XGBClassifier(**XGBM_params)),
              ('RF',   RandomForestClassifier(**RF_params)),
              ('LR',   LogisticRegression(**LR_params)),
              ('KNN',  KNeighborsClassifier(**KNN_params)),
              ('SVC',  SVC(**SVC_params)),
              #('GBC',  GradientBoostingClassifier(**GBC_params)),
              ('EXT',  ExtraTreesClassifier(**EXT_params)),
              ('DT',   DecisionTreeClassifier(**DT_params)),
              #('MLP',  MLPClassifier(**MLP_params)),

              ('LGBM_Bag', BaggingClassifier(base_estimator=LGBM, n_estimators=13, random_state=42)),
              # ('XGBM_Bag', BaggingClassifier(base_estimator=XGBM, n_estimators=13, random_state=42)),
              ('RF_Bag',   BaggingClassifier(base_estimator=RF, n_estimators=13, random_state=42)),
              ('LR_Bag',   BaggingClassifier(base_estimator=LR, n_estimators=13, random_state=42)),
              # ('KNN_Bag',  BaggingClassifier(base_estimator=KNN, n_estimators=13, random_state=42)),
              # ('SVC_Bag',  BaggingClassifier(base_estimator=SVC, n_estimators=13, random_state=42)),
              #('GBC_Bag',  BaggingClassifier(base_estimator=GBC, n_estimators=13, random_state=42)),
              ('EXT_Bag',  BaggingClassifier(base_estimator=EXT, n_estimators=13, random_state=42)),
              ('DT_Bag',   BaggingClassifier(base_estimator=DT, n_estimators=13, random_state=42)),
              # ('MLP_Bag',  BaggingClassifier(base_estimator=MLP, n_estimators=13, random_state=42)),
              
              ('LGBM_Boost', AdaBoostClassifier(base_estimator=LGBM, n_estimators=13, random_state=42)),
              #('XGBM_Boost', AdaBoostClassifier(base_estimator=XGBM, n_estimators=13, random_state=42)),
              ('RF_Boost',   AdaBoostClassifier(base_estimator=RF, n_estimators=13, random_state=42)),
              ('LR_Boost',   AdaBoostClassifier(base_estimator=LR, n_estimators=13, random_state=42)),
              #('KNN_Boost',  AdaBoostClassifier(base_estimator=KNN, n_estimators=13, random_state=42)),
              #('SVC_Boost',  AdaBoostClassifier(base_estimator=SVC, n_estimators=13, random_state=42)),
              #('GBC_Boost',  AdaBoostClassifier(base_estimator=GBC, n_estimators=13, random_state=42)),
              ('EXT_Boost',  AdaBoostClassifier(base_estimator=EXT, n_estimators=13, random_state=42)),
              ('DT_Boost',   AdaBoostClassifier(base_estimator=DT, n_estimators=13, random_state=42)),
              #('MLP_Boost',  AdaBoostClassifier(base_estimator=MLP, n_estimators=13, random_state=42)),
              
              
]

In [None]:
# Stacking Model

CV = StratifiedKFold(n_splits=5)

Stacking_model = StackingClassifier(estimators=estimators, final_estimator=LGBMClassifier(), cv=CV)

Stacking_model.fit(X_ros, y_ros)


In [None]:
Stacking_prediction = Stacking_model.predict(test)

In [None]:
submission = pd.DataFrame()

submission['ID'] = ID['ID']
submission['target'] = final_predictions

In [None]:
submission.to_csv('the_Best.csv', index=False)

In [None]:
# Metrix    0.699

print('Test Accuraccy: ', balanced_accuracy_score(y_test, Stacking_prediction))
print(f'{classification_report(y_test, Stacking_prediction)}\n')
print(f'F1-score: {f1_score(y_test, Stacking_prediction):.3f}\n')
print(confusion_matrix(y_test, Stacking_prediction))



Test Accuraccy:  0.7939282798611924
              precision    recall  f1-score   support

         0.0       0.95      0.97      0.96      3187
         1.0       0.77      0.61      0.68       437

    accuracy                           0.93      3624
   macro avg       0.86      0.79      0.82      3624
weighted avg       0.93      0.93      0.93      3624


F1-score: 0.682

[[3106   81]
 [ 169  268]]


In [None]:
# Split train data in 2 parts, training and validation
training, valid, ytraining, yvalid = train_test_split(X_ros, y_ros, test_size=0.3)



# Specify models
model1 = RandomForestClassifier(**RF_params)
model2 = LogisticRegression(**LR_params)
model3 = CatBoostClassifier(**CatB_params)
model4 = XGBClassifier(**XGBM_params)
model5 = LGBMClassifier(**LGBM_params)
model6 = KNeighborsClassifier(**KNN_params)
model7 = SVC(**SVC_params)
model8 = GradientBoostingClassifier(**GBC_params)
model9 = ExtraTreesClassifier(**EXT_params)
model10 = DecisionTreeClassifier(**DT_params)
model11 = MLPClassifier(**MLP_params)



# Bagging
model12 = BaggingClassifier(base_estimator=LGBM, n_estimators=13, random_state=42)
model13 = BaggingClassifier(base_estimator=XGBM, n_estimators=13, random_state=42)
model14 = BaggingClassifier(base_estimator=RF, n_estimators=13, random_state=42)
model15 = BaggingClassifier(base_estimator=LR, n_estimators=13, random_state=42)
model16 = BaggingClassifier(base_estimator=KNN, n_estimators=13, random_state=42)
model17 = BaggingClassifier(base_estimator=GBC, n_estimators=13, random_state=42)
model18 = BaggingClassifier(base_estimator=EXT, n_estimators=13, random_state=42)
model19 = BaggingClassifier(base_estimator=DT, n_estimators=13, random_state=42)
model20 = BaggingClassifier(base_estimator=MLP, n_estimators=13, random_state=42)



# Boosting
model21 = AdaBoostClassifier(base_estimator=LGBM, n_estimators=13, random_state=42)
model22 = AdaBoostClassifier(base_estimator=RF, n_estimators=13, random_state=42)
model23 = AdaBoostClassifier(base_estimator=LR, n_estimators=13, random_state=42)
model24 = AdaBoostClassifier(base_estimator=GBC, n_estimators=13, random_state=42)
model25 = AdaBoostClassifier(base_estimator=EXT, n_estimators=13, random_state=42)
model26 = AdaBoostClassifier(base_estimator=DT, n_estimators=13, random_state=42)



# Fit models
model1.fit(training, ytraining)
model2.fit(training, ytraining)
model3.fit(training, ytraining)
model4.fit(training, ytraining)
model5.fit(training, ytraining)
model6.fit(training, ytraining)
model7.fit(training, ytraining)
model8.fit(training, ytraining)
model9.fit(training, ytraining)
model10.fit(training, ytraining)
model11.fit(training, ytraining)



model12.fit(training, ytraining)
model13.fit(training, ytraining)
model14.fit(training, ytraining)
model15.fit(training, ytraining)
model16.fit(training, ytraining)
model17.fit(training, ytraining)
model18.fit(training, ytraining)
model19.fit(training, ytraining)
model20.fit(training, ytraining)





model21.fit(training, ytraining)
model22.fit(training, ytraining)
model23.fit(training, ytraining)
model24.fit(training, ytraining)
model25.fit(training, ytraining)
model26.fit(training, ytraining)




# make predictions for validation
preds1 = model1.predict(valid)
preds2 = model2.predict(valid)
preds3 = model3.predict(valid)
preds4 = model4.predict(valid)
preds5 = model5.predict(valid)
preds6 = model6.predict(valid)
preds7 = model7.predict(valid)
preds8 = model8.predict(valid)
preds9 = model9.predict(valid)
preds10 = model10.predict(valid)
preds11 = model11.predict(valid)




preds12 = model12.predict(valid)
preds13 = model13.predict(valid)
preds14 = model14.predict(valid)
preds15 = model15.predict(valid)
preds16 = model16.predict(valid)
preds17 = model17.predict(valid)
preds18 = model18.predict(valid)
preds19 = model19.predict(valid)
preds20 = model20.predict(valid)
preds21 = model21.predict(valid)
preds22 = model22.predict(valid)
preds23 = model23.predict(valid)
preds24 = model24.predict(valid)
preds25 = model25.predict(valid)
preds26 = model26.predict(valid)


# make predictions for test data
test_preds1 = model1.predict(test)
test_preds2 = model2.predict(test)
test_preds3 = model3.predict(test)
test_preds4 = model4.predict(test)
test_preds5 = model5.predict(test)
test_preds6 = model6.predict(test)
test_preds7 = model7.predict(test)
test_preds8 = model8.predict(test)
test_preds9 = model9.predict(test)
test_preds10 = model10.predict(test)
test_preds11 = model11.predict(test)





test_preds12 = model12.predict(test)
test_preds13 = model13.predict(test)
test_preds14 = model14.predict(test)
test_preds15 = model15.predict(test)
test_preds16 = model16.predict(test)
test_preds17 = model17.predict(test)
test_preds18 = model18.predict(test)
test_preds19 = model19.predict(test)
test_preds20 = model20.predict(test)
test_preds21 = model21.predict(test)
test_preds22 = model22.predict(test)
test_preds23 = model23.predict(test)
test_preds24 = model24.predict(test)
test_preds25 = model25.predict(test)
test_preds26 = model26.predict(test)


# Form a new dataset for valid and test via stacking the predictions
stacked_predictions = np.column_stack((preds1, preds2, preds3, preds4, preds5, preds6, preds7, preds8, preds9, preds10, preds11, preds12, preds13, preds14, preds15, preds16, preds17, preds18, preds19, preds20, preds21, preds22, preds23, preds24, preds25, preds26))
stacked_test_predictions = np.column_stack((test_preds1, test_preds2, test_preds3, test_preds4, test_preds5, test_preds6, test_preds7, test_preds8, test_preds9, test_preds10, test_preds11, test_preds12, test_preds13, test_preds14, test_preds15, test_preds16, test_preds17, test_preds18, test_preds19, test_preds20, test_preds21, test_preds22, test_preds23, test_preds24, test_preds25, test_preds26))


# specify meta model
meta_model = LGBMClassifier(**LR_params)

  
# fit meta model on stacked predictions
meta_model.fit(stacked_predictions, yvalid)


# make predictions on the stacked predictions of the test data
final_predictions = meta_model.predict(stacked_test_predictions)


In [None]:
# Metrix

print('Test Accuraccy: ', balanced_accuracy_score(y_test, final_predictions))
print(f'{classification_report(y_test, final_predictions)}\n')
print(f'F1-score: {f1_score(y_test, final_predictions):.3f}\n')
print(confusion_matrix(y_test, final_predictions))



Test Accuraccy:  0.8597560975609756
              precision    recall  f1-score   support

           0       0.81      0.73      0.77        52
           1       0.88      0.92      0.90       112

    accuracy                           0.86       164
   macro avg       0.84      0.83      0.83       164
weighted avg       0.86      0.86      0.86       164


F1-score: 0.900

[[ 38  14]
 [  9 103]]


In [None]:
# Base Models

estimators = [
              ('LGBM', LGBMClassifier(**LGBM_params)),
              ('XGBM', XGBClassifier(**XGBM_params)),
              #('CatBoost', CatBoostClassifier(**CatB_params)),
              ('RF', RandomForestClassifier(**RF_params)),
              ('LR', LogisticRegression(**LR_params)),
              ('KNN', KNeighborsClassifier(**KNN_params)),
              ('SVC', SVC(**SVC_params)),
              ('GBC', GradientBoostingClassifier(**GBC_params)),
              ('EXT', ExtraTreesClassifier(**EXT_params)),
              ('DT',  DecisionTreeClassifier(**DT_params)),
              ('MLP', MLPClassifier(**MLP_params)),
]

In [None]:
# Voting Model



Voting_model = VotingClassifier(estimators=estimators, voting='hard',  n_jobs=-1)

Voting_model.fit(X_train, y_train)

Voting_prediction = Voting_model.predict(X_test)

In [None]:
# Metrix

print('Test Accuraccy: ', balanced_accuracy_score(y_test, Voting_prediction))
print(f'{classification_report(y_test, Voting_prediction)}\n')
print(f'F1-score: {f1_score(y_test, Voting_prediction):.3f}\n')
print(confusion_matrix(y_test, Voting_prediction))



Test Accuraccy:  0.9207317073170732
              precision    recall  f1-score   support

           0       0.91      0.83      0.87        52
           1       0.92      0.96      0.94       112

    accuracy                           0.92       164
   macro avg       0.92      0.90      0.91       164
weighted avg       0.92      0.92      0.92       164


F1-score: 0.943

[[ 43   9]
 [  4 108]]


In [None]:
submission['Voting'] = Voting_prediction
submission['Voting_Accuracy'] = '92%'