In [1]:
from sklearn.model_selection import GridSearchCV, cross_validate
from stree import Stree
from experimentation.Sets import Datasets
from experimentation import Experiment

In [2]:
dataset_name = "cylinder-bands"
dataset_name = "pima"
dataset_name = "conn-bench-sonar-mines-rocks"
parameters = {"C": .15, "degree": 6, "gamma": .7, "kernel": "poly", "max_features": None, "max_iter": 100000.0, "random_state": 0}
parameters = {'C': 7, 'degree': 7, 'gamma': 0.1, 'kernel': 'poly', 'max_features': 'auto', 'max_iter': 10000.0, 'random_state': 1, 'split_criteria': 'impurity'}
parameters = {"C": 0.2, "max_iter": 10000.0, "random_state": 1}
parameters = {"C": 0.55, "gamma": 0.1, "kernel": "rbf", "max_iter": 10000.0, "random_state": 1}
parameters = {"C": 55, "max_iter": 10000.0, "random_state": 1}

In [3]:
datasets = Datasets(normalize=True, standardize=False, set_of_files="tanveer")
X, y = datasets.load(dataset_name)

In [4]:
clf = Stree(**parameters)
results = cross_validate(clf, X, y, n_jobs=1)

In [5]:
results

{'fit_time': array([0.00772715, 0.03221297, 0.01418114, 0.06252027, 0.05369782]),
 'score_time': array([0.00048399, 0.00044394, 0.00045371, 0.00051093, 0.00044894]),
 'test_score': array([0.4047619 , 0.61904762, 0.66666667, 0.92682927, 0.58536585])}

In [6]:
results['test_score'].mean()

0.640534262485482

In [7]:
# 864 modelos por cada dataset
C = [0.05, 0.2, 0.55, 7, 55, 1e4]
max_iter = [1e4, 1e5, 1e6]
gamma = [1e-1, 1, 1e1]
max_features = [None, "auto"]
split_criteria = ["impurity", "max_samples"]
param_grid = [
            {
                "random_state": [1],
                "C": C,
                "max_iter": max_iter,
                "split_criteria": split_criteria,
                "max_features": max_features,
            },
            {
                "random_state": [1],
                "kernel": ["rbf"],
                "C": C,
                "gamma": gamma,
                "max_iter": max_iter,
                "split_criteria": split_criteria,
                "max_features": max_features,
            },
            {
                "random_state": [1],
                "kernel": ["poly"],
                "degree": [3, 5],
                "C": C,
                "gamma": gamma,
                "max_iter": max_iter,
                "split_criteria": split_criteria,
                "max_features": max_features,
            },
        ]

In [8]:
param_grid = [
            {
                "random_state": [1],
                "kernel": ["poly"],
                "degree": [5, 7],
                "C": [55],
                "split_criteria": split_criteria,
                "max_features": max_features,
            },
        ]

In [9]:
clf = Stree()
model = GridSearchCV(clf, n_jobs=1, param_grid=param_grid)
model.fit(X, y)

GridSearchCV(estimator=Stree(), n_jobs=1,
             param_grid=[{'C': [55], 'degree': [5, 7], 'kernel': ['poly'],
                          'max_features': [None, 'auto'], 'random_state': [1],
                          'split_criteria': ['impurity', 'max_samples']}])

print(model.cv_results_['params'][model.best_index_])

In [11]:
print(model.cv_results_['mean_test_score'][model.best_index_])

0.6448315911730547


In [10]:
print(clf)




In [1]:
from stree import Stree
from typing import Union, Optional, List
from abc import ABC
from sklearn.ensemble import (
    AdaBoostClassifier,  # type: ignore
    BaggingClassifier,  # type: ignore
)
from sklearn.ensemble import BaseEnsemble  # type: ignore
from sklearn.base import BaseEstimator  # type: ignore
from sklearn.svm import LinearSVC  # type: ignore
from sklearn.tree import DecisionTreeClassifier  # type: ignore
from odte import Odte


class ModelBase(ABC):
    def __init__(self, random_state: Optional[int]):
        self._random_state = random_state

    def get_model_name(self) -> str:
        return self._model_name

    def get_model(self) -> Union[BaseEnsemble, BaseEstimator]:
        return self._clf

    def get_parameters(self) -> dict:
        return self._param_grid

    def modified_parameters(self, optimum_parameters) -> dict:
        result = dict()
        # useful for ensembles
        excluded = ["base_estimator"]
        default_parameters = type(self._clf)().get_params()
        for key, data in optimum_parameters.items():
            if (
                key not in default_parameters
                or default_parameters[key] != data
            ) and key not in excluded:
                result[key] = data
        return result


class ModelStree(ModelBase):
    def __init__(self, random_state: Optional[int] = None) -> None:
        self._clf = Stree()
        super().__init__(random_state)
        self._model_name = "stree"
        C = [0.05, 0.2, 0.55, 7, 55, 1e4]
        max_iter = [1e4, 1e5, 1e6]
        gamma = [1e-1, 1, 1e1]
        max_features = [None, "auto"]
        split_criteria = ["impurity", "max_samples"]
        self._param_grid = [
            {
                "random_state": [self._random_state],
                "C": C,
                "max_iter": max_iter,
                "split_criteria": split_criteria,
                "max_features": max_features,
            },
            {
                "random_state": [self._random_state],
                "kernel": ["rbf"],
                "C": C,
                "gamma": gamma,
                "max_iter": max_iter,
                "split_criteria": split_criteria,
                "max_features": max_features,
            },
            {
                "random_state": [self._random_state],
                "kernel": ["poly"],
                "degree": [3, 5],
                "C": C,
                "gamma": gamma,
                "max_iter": max_iter,
                "split_criteria": split_criteria,
                "max_features": max_features,
            },
        ]


class ModelSVC(ModelBase):
    def __init__(self, random_state: Optional[int] = None) -> None:
        super().__init__(random_state)
        self._clf = LinearSVC()
        self._model_name = "svc"
        max_iter = [1e4, 1e5, 1e6]
        self._param_grid = [
            {
                "random_state": [self._random_state],
                "C": [1, 55, 1e4],
                "max_iter": max_iter,
            },
        ]


class ModelDecisionTree(ModelBase):
    def __init__(self, random_state: Optional[int] = None) -> None:
        super().__init__(random_state)
        self._clf = DecisionTreeClassifier()
        self._model_name = "dtree"
        self._param_grid = [
            {
                "random_state": [self._random_state],
                "max_features": [None, "log2", "auto"],
            },
        ]


class Ensemble(ModelBase):
    def __init__(
        self,
        random_state: Optional[int] = 0,
        base_model: Union[BaseEnsemble, BaseEstimator] = None,
    ) -> None:
        super().__init__(random_state)
        self._base_model = base_model

    def merge_parameters(self, params: dict) -> dict:
        result = self._parameters.copy()
        for key, value in params.items():
            result[f"base_estimator__{key}"] = value
        return result

    def get_parameters(self) -> List[dict]:
        result = []
        for base_group in self._base_model.get_parameters():
            result.append(self.merge_parameters(base_group))
        return result


class ModelAdaBoost(Ensemble):
    def __init__(
        self, random_state: int, base_model: BaseEstimator = ModelStree
    ):
        # Build base_model
        super().__init__(
            random_state, base_model=base_model(random_state=random_state)
        )
        self._clf = AdaBoostClassifier(
            base_estimator=self._base_model.get_model(),
            random_state=random_state,
        )
        self._model_name = f"Adaboost_{self._base_model.__class__.__name__}"

    def get_parameters(self) -> List[dict]:
        self._parameters = {"n_estimators": [50], "algorithm": ["SAMME"]}
        return super().get_parameters()

In [2]:
b = ModelAdaBoost(1)

In [3]:
b.get_parameters()

[{'n_estimators': [50],
  'algorithm': ['SAMME'],
  'base_estimator__random_state': [1],
  'base_estimator__C': [0.05, 0.2, 0.55, 7, 55, 10000.0],
  'base_estimator__max_iter': [10000.0, 100000.0, 1000000.0],
  'base_estimator__split_criteria': ['impurity', 'max_samples'],
  'base_estimator__max_features': [None, 'auto']},
 {'n_estimators': [50],
  'algorithm': ['SAMME'],
  'base_estimator__random_state': [1],
  'base_estimator__kernel': ['rbf'],
  'base_estimator__C': [0.05, 0.2, 0.55, 7, 55, 10000.0],
  'base_estimator__gamma': [0.1, 1, 10.0],
  'base_estimator__max_iter': [10000.0, 100000.0, 1000000.0],
  'base_estimator__split_criteria': ['impurity', 'max_samples'],
  'base_estimator__max_features': [None, 'auto']},
 {'n_estimators': [50],
  'algorithm': ['SAMME'],
  'base_estimator__random_state': [1],
  'base_estimator__kernel': ['poly'],
  'base_estimator__degree': [3, 5],
  'base_estimator__C': [0.05, 0.2, 0.55, 7, 55, 10000.0],
  'base_estimator__gamma': [0.1, 1, 10.0],
  'bas

In [4]:
parameters = {'C': 7, 'degree': 7, 'gamma': 0.1, 'kernel': 'poly', 'max_features': 'auto', 'max_iter': 10000.0, 'random_state': 1, 'split_criteria': 'impurity'}
print(b.merge_parameters(parameters))

{'n_estimators': [50], 'algorithm': ['SAMME'], 'base_estimator__C': 7, 'base_estimator__degree': 7, 'base_estimator__gamma': 0.1, 'base_estimator__kernel': 'poly', 'base_estimator__max_features': 'auto', 'base_estimator__max_iter': 10000.0, 'base_estimator__random_state': 1, 'base_estimator__split_criteria': 'impurity'}


In [5]:
b._base_model.get_model_name()

'stree'