In [1]:
from loaders.Loaders import CSVLoader
from compoundFeaturization.rdkitFingerprints import MorganFingerprint
from featureSelection.baseFeatureSelector import LowVarianceFS
from splitters.splitters import SingletaskStratifiedSplitter
from models.sklearnModels import SklearnModel
from metrics.Metrics import Metric
from metrics.metricsFunctions import roc_auc_score, precision_score, accuracy_score, confusion_matrix, classification_report
from parameterOptimization.HyperparameterOpt import GridHyperparamOpt

from sklearn.ensemble import RandomForestClassifier, VotingClassifier
import sklearn

import numpy as np
import pandas as pd
import random

In [2]:
#Load Dataset
dataset = CSVLoader(dataset_path='preprocessed_dataset_wfoodb.csv', 
                    mols_field='Smiles', 
                    labels_fields='Class', 
                    id_field='ID')#, shard_size=4000)
dataset = dataset.create_dataset()
dataset.get_shape()

Mols_shape:  23290
Features_shape:  X not defined!
Labels_shape:  (23290,)


In [3]:
#Separate sweeteners from non sweeteners
sweets_indexes = np.where(dataset.y == 1)
#test_sweets_indexes = sweets_indexes[0][:int(len(sweets_indexes[0])*0.1)]
#sweets_indexes = sweets_indexes[0][int(len(sweets_indexes[0])*0.1):]
test_sweets_indexes = sweets_indexes[0][:130]
sweets_indexes = sweets_indexes[0][130:]

sweets = dataset.select(sweets_indexes)
sweets_test = dataset.select(test_sweets_indexes)

non_sweets_indexes = np.where(dataset.y == 0)
#test_non_sweets_indexes = non_sweets_indexes[0][:int(len(non_sweets_indexes[0])*0.15)]
#non_sweets_indexes = non_sweets_indexes[0][int(len(non_sweets_indexes[0])*0.15):]
test_non_sweets_indexes = non_sweets_indexes[0][:130]
non_sweets_indexes = non_sweets_indexes[0][130:]

non_sweets = dataset.select(non_sweets_indexes)
non_sweets_test = dataset.select(test_non_sweets_indexes)

In [4]:
#Ratio between sweeteners and non sweeteners
n_balanced_datasets = int(np.floor(len(non_sweets.y)/len(sweets.y)))
n_balanced_datasets

18

In [5]:
indexes = list(range(0, len(non_sweets.y)))

def partition (list_in, n):
    random.shuffle(list_in)
    return [list_in[i::n] for i in range(n)]

dataset_indexes = partition(indexes, n_balanced_datasets)


balanced_datasets = []
for i, di in enumerate(dataset_indexes):
    ds = non_sweets.select(di)
    ds = ds.merge([sweets])
    balanced_datasets.append(ds)
    
test_bl_dataset = sweets_test.merge([non_sweets_test])
test_bl_dataset = MorganFingerprint().featurize(test_bl_dataset)
test_bl_dataset.get_shape()
#test_bl_dataset = LowVarianceFS(0.15).featureSelection(test_bl_dataset)
#test_bl_dataset.get_shape()
#test_bl_dataset.features2keep

Featurizing datapoint 0
Mols_shape:  260
Features_shape:  (260, 1024)
Labels_shape:  (260,)


In [6]:
estimators = []
predictions = [0]*len(test_bl_dataset.X)

for i, dataset in enumerate(balanced_datasets):
    #Featurization
    #dataset = balanced_datasets[0]
    dataset = MorganFingerprint().featurize(dataset)
    dataset.get_shape()

    #Feature Selection
    #dataset = LowVarianceFS(0.15).featureSelection(dataset)
    #dataset.get_shape()

    #Data Split
    splitter = SingletaskStratifiedSplitter()
    train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(dataset=dataset, frac_train=0.6, 
                                                                                 frac_valid=0.2, frac_test=0.2)

    #metrics
    metrics = [Metric(roc_auc_score), Metric(precision_score), Metric(accuracy_score), Metric(confusion_matrix), 
               Metric(classification_report)]

    #Build a model function for hyperparameter optimization
    def rf_model_builder(n_estimators, max_features, class_weight, model_dir=None):
        rf_model = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features, 
                                          class_weight=class_weight)
        return SklearnModel(rf_model, model_dir)

    params_dict_rf = {"n_estimators": [10, 100],
                      "max_features": ["auto", "sqrt", "log2", None],
                      "class_weight": [{0: 1., 1: 1.}, {0: 1., 1: 5}, {0: 1., 1: 10}]
                      }

    #Hyperparameter Optimization
    optimizer = GridHyperparamOpt(rf_model_builder)

    best_rf, best_hyperparams, all_results = optimizer.hyperparam_search(params_dict_rf, train_dataset, 
                                                                         valid_dataset, Metric(roc_auc_score))

    print('#################')
    print(best_hyperparams)
    print(best_rf)

    #Evaluate best model
    best_rf.evaluate(test_dataset, metrics)
    
    prdt = best_rf.predict(test_bl_dataset)
    for j, p in enumerate(prdt):
        predictions[j] += p[0]
        
    estimator_name = 'rf_'+str(i)
    estimators.append((estimator_name, best_rf))

    
predictions = [x / len(estimators) for x in predictions]
print(predictions)

Featurizing datapoint 0
Featurizing datapoint 1000
Featurizing datapoint 2000
Mols_shape:  2406
Features_shape:  (2406, 1024)
Labels_shape:  (2406,)
Fitting 15 random models from a space of 24 possible models.
Fitting model 1/15
hyperparameters: {'n_estimators': 10, 'max_features': 'auto', 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8257170636849781
Model 1/15, Metric roc_auc_score, Validation set 1: 0.825717
	best_validation_score so far: 0.825717
Fitting model 2/15
hyperparameters: {'n_estimators': 10, 'max_features': 'auto', 'class_weight': {0: 1.0, 1: 5}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8359434682964094
Model 2/15, Metric roc_auc_score, Validation set 2: 0.835943
	best_validation_score so far: 0.835943
Fitting model 3/15
hyperparameters: {'n_estimators': 10, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8236509479824987
Model 3/15, Metric roc_auc_sc

Models Class line 158 --> evaluator
roc_auc_score: 
 0.8629765955969164
Model 10/15, Metric roc_auc_score, Validation set 10: 0.862977
	best_validation_score so far: 0.862977
Fitting model 11/15
hyperparameters: {'n_estimators': 100, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 10}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8544343357177582
Model 11/15, Metric roc_auc_score, Validation set 11: 0.854434
	best_validation_score so far: 0.862977
Fitting model 12/15
hyperparameters: {'n_estimators': 100, 'max_features': 'log2', 'class_weight': {0: 1.0, 1: 10}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8463087714424613
Model 12/15, Metric roc_auc_score, Validation set 12: 0.846309
	best_validation_score so far: 0.862977
Fitting model 13/15
hyperparameters: {'n_estimators': 100, 'max_features': None, 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8565699006875477
Model 13/15, Metric roc_auc_score, Validation set 13: 0.

Featurizing datapoint 2000
Mols_shape:  2406
Features_shape:  (2406, 1024)
Labels_shape:  (2406,)
Fitting 15 random models from a space of 24 possible models.
Fitting model 1/15
hyperparameters: {'n_estimators': 10, 'max_features': 'auto', 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8382873810681297
Model 1/15, Metric roc_auc_score, Validation set 1: 0.838287
	best_validation_score so far: 0.838287
Fitting model 2/15
hyperparameters: {'n_estimators': 10, 'max_features': 'auto', 'class_weight': {0: 1.0, 1: 10}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8295714980206959
Model 2/15, Metric roc_auc_score, Validation set 2: 0.829571
	best_validation_score so far: 0.838287
Fitting model 3/15
hyperparameters: {'n_estimators': 10, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 5}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8191714702409889
Model 3/15, Metric roc_auc_score, Validation set 3: 0.819171
	best_validation_sco

Models Class line 158 --> evaluator
roc_auc_score: 
 0.8411016949152542
Model 10/15, Metric roc_auc_score, Validation set 10: 0.841102
	best_validation_score so far: 0.853466
Fitting model 11/15
hyperparameters: {'n_estimators': 100, 'max_features': 'log2', 'class_weight': {0: 1.0, 1: 5}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8450611280911364
Model 11/15, Metric roc_auc_score, Validation set 11: 0.845061
	best_validation_score so far: 0.853466
Fitting model 12/15
hyperparameters: {'n_estimators': 100, 'max_features': 'log2', 'class_weight': {0: 1.0, 1: 10}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.842873020283412
Model 12/15, Metric roc_auc_score, Validation set 12: 0.842873
	best_validation_score so far: 0.853466
Fitting model 13/15
hyperparameters: {'n_estimators': 100, 'max_features': None, 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8262017227007502
Model 13/15, Metric roc_auc_score, Validation set 13: 0.82

Featurizing datapoint 2000
Mols_shape:  2405
Features_shape:  (2405, 1024)
Labels_shape:  (2405,)
Fitting 15 random models from a space of 24 possible models.
Fitting model 1/15
hyperparameters: {'n_estimators': 10, 'max_features': 'auto', 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8235814987151885
Model 1/15, Metric roc_auc_score, Validation set 1: 0.823581
	best_validation_score so far: 0.823581
Fitting model 2/15
hyperparameters: {'n_estimators': 10, 'max_features': 'auto', 'class_weight': {0: 1.0, 1: 10}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8399368011667476
Model 2/15, Metric roc_auc_score, Validation set 2: 0.839937
	best_validation_score so far: 0.839937
Fitting model 3/15
hyperparameters: {'n_estimators': 10, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 10}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8400756997013681
Model 3/15, Metric roc_auc_score, Validation set 3: 0.840076
	best_validation_sc

Models Class line 158 --> evaluator
roc_auc_score: 
 0.8796096951177166
Model 10/15, Metric roc_auc_score, Validation set 10: 0.879610
	best_validation_score so far: 0.894177
Fitting model 11/15
hyperparameters: {'n_estimators': 100, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 5}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8837072018890201
Model 11/15, Metric roc_auc_score, Validation set 11: 0.883707
	best_validation_score so far: 0.894177
Fitting model 12/15
hyperparameters: {'n_estimators': 100, 'max_features': 'log2', 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8922147371345234
Model 12/15, Metric roc_auc_score, Validation set 12: 0.892215
	best_validation_score so far: 0.894177
Fitting model 13/15
hyperparameters: {'n_estimators': 100, 'max_features': 'log2', 'class_weight': {0: 1.0, 1: 10}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8921800125008682
Model 13/15, Metric roc_auc_score, Validation set 13: 0

RDKit ERROR: [09:50:56] Explicit valence for atom # 0 P, 11, is greater than permitted


error in smile: [P](OCC=C(C)C)(OCC=C(C)C)(=O)(OP(OCC=C(C)C)(OCC=C(C)C)=O)(CC=C(C)C)(CC=C(C)C)(CC=C(C)C)(CC=C(C)C)(CC=C(C)C)CC=C(C)C
Featurizing datapoint 2000
Elements with indexes:  [1099]  were removed due to the presence of NAs!
The elements in question are:  ['[P](OCC=C(C)C)(OCC=C(C)C)(=O)(OP(OCC=C(C)C)(OCC=C(C)C)=O)(CC=C(C)C)(CC=C(C)C)(CC=C(C)C)(CC=C(C)C)(CC=C(C)C)CC=C(C)C']
Mols_shape:  2404
Features_shape:  (2404, 1024)
Labels_shape:  (2404,)
Fitting 15 random models from a space of 24 possible models.
Fitting model 1/15
hyperparameters: {'n_estimators': 10, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8219513465645674
Model 1/15, Metric roc_auc_score, Validation set 1: 0.821951
	best_validation_score so far: 0.821951
Fitting model 2/15
hyperparameters: {'n_estimators': 10, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 5}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8569828619055062
Model 2/15, 

RDKit ERROR: [09:51:36] Explicit valence for atom # 3 B, 4, is greater than permitted


error in smile: OB1O[B]2(O)OB(O)O[B](O)(O1)O2
Featurizing datapoint 1000
Featurizing datapoint 2000
Elements with indexes:  [822]  were removed due to the presence of NAs!
The elements in question are:  ['OB1O[B]2(O)OB(O)O[B](O)(O1)O2']
Mols_shape:  2404
Features_shape:  (2404, 1024)
Labels_shape:  (2404,)
Fitting 15 random models from a space of 24 possible models.
Fitting model 1/15
hyperparameters: {'n_estimators': 10, 'max_features': 'auto', 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.821480658379054
Model 1/15, Metric roc_auc_score, Validation set 1: 0.821481
	best_validation_score so far: 0.821481
Fitting model 2/15
hyperparameters: {'n_estimators': 10, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8524723939162442
Model 2/15, Metric roc_auc_score, Validation set 2: 0.852472
	best_validation_score so far: 0.852472
Fitting model 3/15
hyperparameters: {'n_estimators': 10,

Models Class line 158 --> evaluator
roc_auc_score: 
 0.8733916757826744
Model 9/15, Metric roc_auc_score, Validation set 9: 0.873392
	best_validation_score so far: 0.873392
Fitting model 10/15
hyperparameters: {'n_estimators': 100, 'max_features': 'auto', 'class_weight': {0: 1.0, 1: 10}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8565661301244987
Model 10/15, Metric roc_auc_score, Validation set 10: 0.856566
	best_validation_score so far: 0.873392
Fitting model 11/15
hyperparameters: {'n_estimators': 100, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8718028858675835
Model 11/15, Metric roc_auc_score, Validation set 11: 0.871803
	best_validation_score so far: 0.873392
Fitting model 12/15
hyperparameters: {'n_estimators': 100, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 10}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8609939052977028
Model 12/15, Metric roc_auc_score, Validation set 12: 0.

RDKit ERROR: [09:53:34] Explicit valence for atom # 1 Cl, 4, is greater than permitted


error in smile: O=[Cl]=O
Featurizing datapoint 1000
Featurizing datapoint 2000
Elements with indexes:  [856]  were removed due to the presence of NAs!
The elements in question are:  ['O=[Cl]=O']
Mols_shape:  2404
Features_shape:  (2404, 1024)
Labels_shape:  (2404,)
Fitting 15 random models from a space of 24 possible models.
Fitting model 1/15
hyperparameters: {'n_estimators': 10, 'max_features': 'auto', 'class_weight': {0: 1.0, 1: 5}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8150045142023752
Model 1/15, Metric roc_auc_score, Validation set 1: 0.815005
	best_validation_score so far: 0.815005
Fitting model 2/15
hyperparameters: {'n_estimators': 10, 'max_features': 'auto', 'class_weight': {0: 1.0, 1: 10}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8419334676019168
Model 2/15, Metric roc_auc_score, Validation set 2: 0.841933
	best_validation_score so far: 0.841933
Fitting model 3/15
hyperparameters: {'n_estimators': 10, 'max_features': 'sqrt', 'class_weight': {0:

Models Class line 158 --> evaluator
roc_auc_score: 
 0.8439079022763973
Model 9/15, Metric roc_auc_score, Validation set 9: 0.843908
	best_validation_score so far: 0.848752
Fitting model 10/15
hyperparameters: {'n_estimators': 100, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8776631765380007
Model 10/15, Metric roc_auc_score, Validation set 10: 0.877663
	best_validation_score so far: 0.877663
Fitting model 11/15
hyperparameters: {'n_estimators': 100, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 5}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8833671927905401
Model 11/15, Metric roc_auc_score, Validation set 11: 0.883367
	best_validation_score so far: 0.883367
Fitting model 12/15
hyperparameters: {'n_estimators': 100, 'max_features': 'log2', 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8717507943949575
Model 12/15, Metric roc_auc_score, Validation set 12: 0.

Featurizing datapoint 1000
Featurizing datapoint 2000
Mols_shape:  2405
Features_shape:  (2405, 1024)
Labels_shape:  (2405,)
Fitting 15 random models from a space of 24 possible models.
Fitting model 1/15
hyperparameters: {'n_estimators': 10, 'max_features': 'auto', 'class_weight': {0: 1.0, 1: 10}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8336719279054019
Model 1/15, Metric roc_auc_score, Validation set 1: 0.833672
	best_validation_score so far: 0.833672
Fitting model 2/15
hyperparameters: {'n_estimators': 10, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8427879356149398
Model 2/15, Metric roc_auc_score, Validation set 2: 0.842788
	best_validation_score so far: 0.842788
Fitting model 3/15
hyperparameters: {'n_estimators': 10, 'max_features': 'sqrt', 'class_weight': {0: 1.0, 1: 5}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8340365682137835
Model 3/15, Metric roc_auc_score, Validation set 3: 0.

RDKit ERROR: [09:55:36] Explicit valence for atom # 1 Cl, 9, is greater than permitted


error in smile: O=[Cl-](=O)(=O)=O
Featurizing datapoint 1000
Featurizing datapoint 2000
Elements with indexes:  [855]  were removed due to the presence of NAs!
The elements in question are:  ['O=[Cl-](=O)(=O)=O']
Mols_shape:  2404
Features_shape:  (2404, 1024)
Labels_shape:  (2404,)
Fitting 15 random models from a space of 24 possible models.
Fitting model 1/15
hyperparameters: {'n_estimators': 10, 'max_features': 'auto', 'class_weight': {0: 1.0, 1: 1.0}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8275053823182165
Model 1/15, Metric roc_auc_score, Validation set 1: 0.827505
	best_validation_score so far: 0.827505
Fitting model 2/15
hyperparameters: {'n_estimators': 10, 'max_features': 'auto', 'class_weight': {0: 1.0, 1: 5}}
Models Class line 158 --> evaluator
roc_auc_score: 
 0.8461004236405307
Model 2/15, Metric roc_auc_score, Validation set 2: 0.846100
	best_validation_score so far: 0.846100
Fitting model 3/15
hyperparameters: {'n_estimators': 10, 'max_features': 'log2', 

In [7]:
int_preds = []

for p in predictions:
    if p >= 0.5:
        int_preds.append(0)
    else: int_preds.append(1)
        
print(int_preds)
print(test_bl_dataset.y)

[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0]
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 

In [8]:
print('precision_score:\n ', sklearn.metrics.precision_score(test_bl_dataset.y, int_preds))

print('accuracy_score:\n ', sklearn.metrics.accuracy_score(test_bl_dataset.y, int_preds))

print('confusion_matrix:\n ', sklearn.metrics.confusion_matrix(test_bl_dataset.y, int_preds))

print('classification_report:\n ', sklearn.metrics.classification_report(test_bl_dataset.y, int_preds))

precision_score:
  0.8592592592592593
accuracy_score:
  0.8730769230769231
confusion_matrix:
  [[111  19]
 [ 14 116]]
classification_report:
                precision    recall  f1-score   support

           0       0.89      0.85      0.87       130
           1       0.86      0.89      0.88       130

    accuracy                           0.87       260
   macro avg       0.87      0.87      0.87       260
weighted avg       0.87      0.87      0.87       260



In [9]:
#TODO: implement voing classifier in our pipeline
#ensemble = VotingClassifier(estimators, voting='hard')

#ensemble.fit(train_dataset.X, train_dataset.y)

#ensemble.score(test_dataset.X, test_dataset.y)