In [1]:
import datetime, time
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_validate
from sklearn import tree
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier
from stree import Stree
from odte import Odte

random_state = 1

In [12]:
from sklearn.datasets import load_wine
X, y = load_wine(return_X_y=True)
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)

In [13]:
n_estimators = 20
clf = {}
clf["stree"] = Stree(random_state=random_state, max_depth=5)
clf["stree"].set_params(**dict(splitter="best", kernel="linear", max_features="auto"))
clf["odte"] = Odte(n_jobs=-1, base_estimator=clf["stree"], random_state=random_state, n_estimators=n_estimators, max_features=.8)
clf["adaboost"] = AdaBoostClassifier(base_estimator=clf["stree"], n_estimators=n_estimators, random_state=random_state, algorithm="SAMME")
clf["bagging"] = BaggingClassifier(base_estimator=clf["stree"], n_estimators=n_estimators)

In [14]:
print("*"*30,"Results for wine", "*"*30)
for clf_type, item in clf.items():
    print(f"Training {clf_type}...")
    now = time.time()
    item.fit(Xtrain, ytrain)
    print(f"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds")

****************************** Results for wine ******************************
Training stree...
Score: 94.444 in 0.18 seconds
Training odte...
Score: 100.000 in 1.33 seconds
Training adaboost...
Score: 94.444 in 0.62 seconds
Training bagging...
Score: 100.000 in 2.88 seconds


In [5]:
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)

In [6]:
n_estimators = 10
clf = {}
clf["stree"] = Stree(random_state=random_state, max_depth=3)
clf["odte"] = Odte(n_jobs=-1, random_state=random_state, n_estimators=n_estimators, max_features=1.0)
clf["adaboost"] = AdaBoostClassifier(base_estimator=clf["stree"], n_estimators=n_estimators, random_state=random_state, algorithm="SAMME")
clf["bagging"] = BaggingClassifier(base_estimator=clf["stree"], n_estimators=n_estimators)

In [7]:
print("*"*30,"Results for iris", "*"*30)
for clf_type, item in clf.items():
    print(f"Training {clf_type}...")
    now = time.time()
    item.fit(Xtrain, ytrain)
    print(f"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds")

****************************** Results for iris ******************************
Training stree...
Score: 100.000 in 0.02 seconds
Training odte...
Score: 100.000 in 0.12 seconds
Training adaboost...
Score: 83.333 in 0.01 seconds
Training bagging...
Score: 100.000 in 0.13 seconds


In [8]:
cross = cross_validate(estimator=clf["odte"], X=X, y=y, n_jobs=-1, return_train_score=True)
print(cross)
print(f"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}")

{'fit_time': array([0.22121811, 0.21985221, 0.19185114, 0.19187999, 0.20067477]), 'score_time': array([0.01268458, 0.01461887, 0.01160598, 0.01308703, 0.01070738]), 'test_score': array([1.        , 1.        , 1.        , 0.93333333, 1.        ]), 'train_score': array([0.98333333, 0.96666667, 0.99166667, 0.99166667, 0.975     ])}
98.667 +- 0.027


In [9]:
cross = cross_validate(estimator=clf["adaboost"], X=X, y=y, n_jobs=-1, return_train_score=True)
print(cross)
print(f"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}")

{'fit_time': array([0.02130818, 0.02036786, 0.02829814, 0.02326989, 0.03807497]), 'score_time': array([0.00140715, 0.00173712, 0.00199389, 0.00132608, 0.00199199]), 'test_score': array([1.        , 0.93333333, 0.93333333, 0.93333333, 0.96666667]), 'train_score': array([0.93333333, 0.96666667, 0.96666667, 0.96666667, 0.95      ])}
95.333 +- 0.027


In [10]:
from sklearn.utils.estimator_checks import check_estimator
# Make checks one by one
c = 0
checks = check_estimator(Odte(), generate_only=True)
for check in checks:
    c += 1
    print(c, check[1])
    check[1](check[0])

1 functools.partial(<function check_no_attributes_set_in_init at 0x12b593290>, 'Odte')
2 functools.partial(<function check_estimators_dtypes at 0x12b58d3b0>, 'Odte')
3 functools.partial(<function check_fit_score_takes_y at 0x12b58d290>, 'Odte')
4 functools.partial(<function check_sample_weights_pandas_series at 0x12b586b90>, 'Odte')
5 functools.partial(<function check_sample_weights_not_an_array at 0x12b586cb0>, 'Odte')
6 functools.partial(<function check_sample_weights_list at 0x12b586dd0>, 'Odte')
7 functools.partial(<function check_sample_weights_shape at 0x12b586ef0>, 'Odte')
8 functools.partial(<function check_sample_weights_invariance at 0x12b58a050>, 'Odte')
9 functools.partial(<function check_estimators_fit_returns_self at 0x12b5913b0>, 'Odte')
10 functools.partial(<function check_estimators_fit_returns_self at 0x12b5913b0>, 'Odte', readonly_memmap=True)
11 functools.partial(<function check_complex_data at 0x12b58a200>, 'Odte')
12 functools.partial(<function check_dtype_object 