In [None]:
from config import TestingConfig, ModelConfig, RANDOM_STATE
from super_classifier import SuperClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import (
    GradientBoostingClassifier,
)
from config import (
    EnsembleConfig,
    EnsembleEnum,
    VotingEnum,
    ScoringConfig,
)
from sklearn.metrics import f1_score, recall_score
from sklearn.exceptions import ConvergenceWarning
import warnings

warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [2]:
iris = load_iris()
X, y = iris.data, iris.target

In [3]:
config1 = TestingConfig(
    models=[
        ModelConfig(
            name="Logistic Regression",
            estimator_class=LogisticRegression,
            params={"max_iter": 3, "random_state": RANDOM_STATE},
        )
    ],
    test_size=0.2,
    random_state=RANDOM_STATE,
    verbose=True,
)

clf1 = SuperClassifier(config1)
results_simple = clf1.fit_evaluate(X, y)
print(results_simple)

2025-03-24 18:34:03,156 - SuperClassifier - INFO - Evaluating Logistic Regression...
2025-03-24 18:34:03,159 - SuperClassifier - INFO - Before creating ensemble
2025-03-24 18:34:03,160 - SuperClassifier - INFO - Creating model


2025-03-24 18:34:03,182 - SuperClassifier - INFO -   CV Score: 0.7583 ± 0.0667
2025-03-24 18:34:03,182 - SuperClassifier - INFO -   Test Score: 0.6667


                  name  is_ensemble   cv_mean    cv_std  test_score
0  Logistic Regression        False  0.758333  0.066667    0.666667


In [4]:
config2 = TestingConfig(
    models=[
        ModelConfig(
            name="Random Forest",
            estimator_class=RandomForestClassifier,
            params={"n_estimators": 10, "random_state": RANDOM_STATE},
            scoring=ScoringConfig(
                func=recall_score, kwargs={"average": "macro"}
            ),  # by default average is 'binary' but we have 3 classes
        ),
        ModelConfig(
            name="SVM",
            estimator_class=SVC,
            params={"probability": True, "random_state": RANDOM_STATE},
            scoring=ScoringConfig(func=recall_score, kwargs={"average": "macro"}),
        ),
    ],
    verbose=True,
)

clf2 = SuperClassifier(config2)
results2 = clf2.fit_evaluate(X, y)
print(results2)

best_model = clf2.get_best_model()
print(f"Best model: {best_model['name']} with score {best_model['score']:.4f}")

2025-03-24 18:34:03,209 - SuperClassifier - INFO - Evaluating Random Forest...
2025-03-24 18:34:03,211 - SuperClassifier - INFO - Before creating ensemble
2025-03-24 18:34:03,212 - SuperClassifier - INFO - Creating model
2025-03-24 18:34:03,365 - SuperClassifier - INFO -   CV Score: 0.9488 ± 0.0611
2025-03-24 18:34:03,365 - SuperClassifier - INFO -   Test Score: 1.0000
2025-03-24 18:34:03,366 - SuperClassifier - INFO - Evaluating SVM...
2025-03-24 18:34:03,366 - SuperClassifier - INFO - Before creating ensemble
2025-03-24 18:34:03,366 - SuperClassifier - INFO - Creating model
2025-03-24 18:34:03,385 - SuperClassifier - INFO -   CV Score: 0.9500 ± 0.0612
2025-03-24 18:34:03,385 - SuperClassifier - INFO -   Test Score: 1.0000


            name  is_ensemble  cv_mean    cv_std  test_score
0  Random Forest        False  0.94881  0.061121         1.0
1            SVM        False  0.95000  0.061237         1.0
Best model: Random Forest with score 1.0000


In [5]:
config3 = TestingConfig(
    models=[
        ModelConfig(
            name="Logistic Regression",
            estimator_class=LogisticRegression,
            params={"C": 0.1, "max_iter": 1000, "random_state": RANDOM_STATE},
            scoring=ScoringConfig(func=f1_score, kwargs={"average": "weighted"}),
        ),
        ModelConfig(
            name="Gradient Boosting",
            estimator_class=GradientBoostingClassifier,
            params={
                "n_estimators": 100,
                "learning_rate": 0.1,
                "random_state": RANDOM_STATE,
            },
            is_ensemble=True,
            scoring=ScoringConfig(func=f1_score, kwargs={"average": "weighted"}),
        ),
        ModelConfig(
            name="Voting Ensemble",
            estimator_class="sklearn.ensemble.VotingClassifier",
            is_ensemble=True,
            ensemble_config=EnsembleConfig(
                ensemble_type=EnsembleEnum.voting,
                voting_type=VotingEnum.soft,
                weights=[1, 2, 3],
            ),
            scoring=ScoringConfig(func=f1_score, kwargs={"average": "weighted"}),
        ),
        ModelConfig(
            name="Stacking Ensemble",
            estimator_class="sklearn.ensemble.StackingClassifier",
            is_ensemble=True,
            scoring=ScoringConfig(func=f1_score, kwargs={"average": "weighted"}),
            ensemble_config=EnsembleConfig(
                ensemble_type=EnsembleEnum.stacking,
                final_estimator=LogisticRegression(C=0.1, max_iter=1000),
            ),
        ),
    ],
    ensemble_models=[
        LogisticRegression(C=0.1, max_iter=1000),
        RandomForestClassifier(n_estimators=50),
        SVC(probability=True, kernel="rbf"),
    ],
    verbose=True,
)

clf3 = SuperClassifier(config3)
results3 = clf3.fit_evaluate(X, y)
print(results3)

X_new = X[:5]
predictions = clf3.predict(X_new)
probabilities = clf3.predict_proba(X_new)

2025-03-24 18:34:03,397 - SuperClassifier - INFO - Evaluating Logistic Regression...
2025-03-24 18:34:03,399 - SuperClassifier - INFO - Before creating ensemble
2025-03-24 18:34:03,399 - SuperClassifier - INFO - Creating model
2025-03-24 18:34:03,449 - SuperClassifier - INFO -   CV Score: 0.9328 ± 0.0575
2025-03-24 18:34:03,449 - SuperClassifier - INFO -   Test Score: 1.0000
2025-03-24 18:34:03,450 - SuperClassifier - INFO - Evaluating Gradient Boosting...
2025-03-24 18:34:03,450 - SuperClassifier - INFO - Before creating ensemble
2025-03-24 18:34:03,450 - SuperClassifier - INFO - Creating model
2025-03-24 18:34:04,301 - SuperClassifier - INFO -   CV Score: 0.9410 ± 0.0575
2025-03-24 18:34:04,302 - SuperClassifier - INFO -   Test Score: 1.0000
2025-03-24 18:34:04,303 - SuperClassifier - INFO - Evaluating Voting Ensemble...
2025-03-24 18:34:04,303 - SuperClassifier - INFO - Before creating ensemble
2025-03-24 18:34:04,304 - SuperClassifier - INFO - Creating ensemble
2025-03-24 18:34:04,

                  name  is_ensemble   cv_mean    cv_std  test_score
0  Logistic Regression        False  0.932804  0.057458         1.0
1    Gradient Boosting         True  0.940971  0.057485         1.0
2      Voting Ensemble         True  0.956078  0.068861         1.0
3    Stacking Ensemble         True  0.939278  0.060741         1.0
