In [None]:
import datetime, time
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_validate
from sklearn import tree
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier
from stree import Stree
from odte import Odte

random_state = 1

In [None]:
from sklearn.datasets import load_wine
X, y = load_wine(return_X_y=True)
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)

In [None]:
n_estimators = 20
clf = {}
clf["stree"] = Stree(random_state=random_state, max_depth=5)
clf["stree"].set_params(**dict(splitter="best", kernel="linear", max_features="auto"))
clf["odte"] = Odte(n_jobs=-1, estimator=clf["stree"], random_state=random_state, n_estimators=n_estimators, max_features=.8)
clf["adaboost"] = AdaBoostClassifier(estimator=clf["stree"], n_estimators=n_estimators, random_state=random_state, algorithm="SAMME")
clf["bagging"] = BaggingClassifier(estimator=clf["stree"], n_estimators=n_estimators)

In [None]:
print("*"*30,"Results for wine", "*"*30)
for clf_type, item in clf.items():
    print(f"Training {clf_type}...")
    now = time.time()
    item.fit(Xtrain, ytrain)
    print(f"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds")

In [None]:
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)

In [None]:
n_estimators = 10
clf = {}
clf["stree"] = Stree(random_state=random_state, max_depth=3)
clf["odte"] = Odte(n_jobs=-1, random_state=random_state, n_estimators=n_estimators, max_features=1.0)
clf["adaboost"] = AdaBoostClassifier(estimator=clf["stree"], n_estimators=n_estimators, random_state=random_state, algorithm="SAMME")
clf["bagging"] = BaggingClassifier(estimator=clf["stree"], n_estimators=n_estimators)

In [None]:
print("*"*30,"Results for iris", "*"*30)
for clf_type, item in clf.items():
    print(f"Training {clf_type}...")
    now = time.time()
    item.fit(Xtrain, ytrain)
    print(f"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds")

In [None]:
cross = cross_validate(estimator=clf["odte"], X=X, y=y, n_jobs=-1, return_train_score=True)
print(cross)
print(f"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}")

In [None]:
cross = cross_validate(estimator=clf["adaboost"], X=X, y=y, n_jobs=-1, return_train_score=True)
print(cross)
print(f"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}")

In [None]:
from sklearn.utils.estimator_checks import check_estimator
# Make checks one by one
c = 0
checks = check_estimator(Odte(), generate_only=True)
for check in checks:
    c += 1
    print(c, check[1])
    check[1](check[0])