In [2]:
import numpy as np

from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression

from hellsemble import Hellsemble
from hellsemble.estimator_generator import PredefinedEstimatorsGenerator
from hellsemble.prediction_generator import FixedThresholdPredictionGenerator

from test.resources.data_generator import Data

from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [3]:
data = Data()

data.add_segment(n_features=3, n_observations=3000, type="linear", shuffle=False)
data.add_segment(n_features=3, n_observations=1000, type="non-linear", shuffle=False)
data.add_segment(n_features=3, n_observations=5000, type='complex', shuffle=False)
data.add_segment(n_features=3, n_observations=1000, type='imbalanced', shuffle=False)

In [4]:
#data.plot_segments()

#### Greedy mode

In [5]:
X_train, X_test, y_train, y_test = train_test_split(data.X, data.y, test_size=0.2, random_state=44)

In [26]:
classifiers = [  
    LogisticRegression(),
    SVC(probability=True),
    KNeighborsClassifier(),
    DecisionTreeClassifier()

]

estimator_generator = PredefinedEstimatorsGenerator(classifiers)
prediction_generator = FixedThresholdPredictionGenerator(0.5)

routing_model = LogisticRegression()

hellsemble_model = Hellsemble(
    estimator_generator=estimator_generator,
    prediction_generator=prediction_generator,
    routing_model=routing_model,
    metric = 'roc_auc'
)

In [None]:
hellsemble_model.fit(X_train, y_train)

In [None]:
y_pred = hellsemble_model.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(hellsemble_model.evaluate_hellsemble(X_test, y_test))

#### Sequential mode

In [19]:
X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=123)

In [None]:
estimator = Hellsemble(
    PredefinedEstimatorsGenerator(
        [LogisticRegression(), LogisticRegression()]
    ),
    FixedThresholdPredictionGenerator(0.5),
    LogisticRegression(),
    mode='Sequential'
)
model = estimator.fit(X, y)
model

In [None]:
model.predict_proba(X)

In [22]:
y_pred = model.predict(X)

In [23]:
y_pred = y_pred.astype(bool)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y, y_pred)