In [1]:
import datetime, time
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_validate
from sklearn import tree
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier
from stree import Stree
from odte import Odte

random_state = 1

In [2]:
from sklearn.datasets import load_wine
X, y = load_wine(return_X_y=True)
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)

In [3]:
n_estimators = 20
clf = {}
clf["stree"] = Stree(random_state=random_state, max_depth=5)
clf["stree"].set_params(**dict(splitter="best", kernel="linear", max_features="auto"))
clf["odte"] = Odte(base_estimator=clf["stree"], random_state=random_state, n_estimators=n_estimators, max_features=.8)
clf["adaboost"] = AdaBoostClassifier(base_estimator=clf["stree"], n_estimators=n_estimators, random_state=random_state, algorithm="SAMME")
clf["bagging"] = BaggingClassifier(base_estimator=clf["stree"], n_estimators=n_estimators)

In [4]:
print("*"*30,"Results for wine", "*"*30)
for clf_type, item in clf.items():
    print(f"Training {clf_type}...")
    now = time.time()
    item.fit(Xtrain, ytrain)
    print(f"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds")

****************************** Results for wine ******************************
Training stree...
Score: 94.444 in 0.17 seconds
Training odte...
Score: 97.222 in 2.70 seconds
Training adaboost...
Score: 94.444 in 0.60 seconds
Training bagging...
Score: 100.000 in 2.55 seconds


In [5]:
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)

In [6]:
n_estimators = 10
clf = {}
clf["stree"] = Stree(random_state=random_state, max_depth=3)
clf["odte"] = Odte(random_state=random_state, n_estimators=n_estimators, max_features=1.0)
clf["adaboost"] = AdaBoostClassifier(base_estimator=clf["stree"], n_estimators=n_estimators, random_state=random_state, algorithm="SAMME")
clf["bagging"] = BaggingClassifier(base_estimator=clf["stree"], n_estimators=n_estimators)

In [7]:
print("*"*30,"Results for iris", "*"*30)
for clf_type, item in clf.items():
    print(f"Training {clf_type}...")
    now = time.time()
    item.fit(Xtrain, ytrain)
    print(f"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds")

****************************** Results for iris ******************************
Training stree...
Score: 100.000 in 0.02 seconds
Training odte...
Score: 93.333 in 0.12 seconds
Training adaboost...
Score: 83.333 in 0.01 seconds
Training bagging...
Score: 100.000 in 0.11 seconds


In [8]:
cross = cross_validate(estimator=clf["odte"], X=X, y=y, n_jobs=-1, return_train_score=True)
print(cross)
print(f"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}")

{'fit_time': array([0.15752316, 0.18354201, 0.14742589, 0.13827896, 0.14534211]), 'score_time': array([0.00940681, 0.01064587, 0.01085019, 0.00925183, 0.00878191]), 'test_score': array([0.8       , 0.93333333, 0.93333333, 0.93333333, 0.96666667]), 'train_score': array([0.875     , 0.95      , 0.98333333, 0.98333333, 0.95833333])}
91.333 +- 0.058


In [9]:
cross = cross_validate(estimator=clf["adaboost"], X=X, y=y, n_jobs=-1, return_train_score=True)
print(cross)
print(f"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}")

{'fit_time': array([0.01752877, 0.03304005, 0.03542018, 0.03398919, 0.03945518]), 'score_time': array([0.00135112, 0.00164104, 0.00159597, 0.0018959 , 0.00189495]), 'test_score': array([1.        , 0.93333333, 0.93333333, 0.93333333, 0.96666667]), 'train_score': array([0.93333333, 0.96666667, 0.96666667, 0.96666667, 0.95      ])}
95.333 +- 0.027
