In [1]:
import treeple
from treeple import ObliqueRandomForestClassifier
from sklearn.model_selection import train_test_split
from treeple.datasets import make_trunk_classification
import cProfile
import pstats
import io

In [2]:
# Shared hyperparameters that used for both models
MAX_DEPTH = 10
N_ESTIMATORS = 10
RANDOM_SEED = 42
N_JOBS=-1
BOOTSTRAP = True
MAX_FEATURE = 3000
FEATURE_COMBINATIONS = 1000.0

params_treeple = {}
params_treeple["n_estimators"] = int(N_ESTIMATORS)
params_treeple["criterion"] = "entropy"
params_treeple["max_depth"] = MAX_DEPTH
params_treeple["min_samples_split"] = 2
params_treeple["min_samples_leaf"] = 1
params_treeple["min_weight_fraction_leaf"] = 0.0
params_treeple["max_features"] = MAX_FEATURE
params_treeple["max_leaf_nodes"] = 30
params_treeple["min_impurity_decrease"] = 0.0
params_treeple["bootstrap"] = BOOTSTRAP
params_treeple["oob_score"] = False
params_treeple["n_jobs"] = N_JOBS
params_treeple["random_state"] = RANDOM_SEED
params_treeple["verbose"] = 0
params_treeple["warm_start"] = False
params_treeple["class_weight"] = None
params_treeple["max_samples"] = None
params_treeple["feature_combinations"] = FEATURE_COMBINATIONS


In [3]:
X, y = make_trunk_classification(n_samples=1000, n_dim=1000)

yesssss


In [4]:
treeple_model = ObliqueRandomForestClassifier(**params_treeple)

In [5]:

def profiling_fit(n_estimators, n_dim, n_samples, max_features, feature_combinations, max_depth, n_jobs, max_leaf_nodes, treeple_params=None):
    X, y = make_trunk_classification(n_samples=n_samples, n_dim=n_dim)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    if treeple_params is None:
        params = {
            "n_estimators": n_estimators,
            "max_depth": max_depth,
            "max_features": max_features,
            "feature_combinations": feature_combinations,
            "n_jobs": n_jobs,
            "max_leaf_nodes": max_leaf_nodes
        }
    else:
        params = treeple_params.copy()
        params.update({
            "n_estimators": n_estimators,
            "max_depth": max_depth,
            "max_features": max_features,
            "feature_combinations": feature_combinations,
            "n_jobs": n_jobs,
            "max_leaf_nodes": max_leaf_nodes,
        })

    model = ObliqueRandomForestClassifier(**params)

    
    profiler = cProfile.Profile()
    profiler.enable()

    model.fit(X_train, y_train)

    profiler.disable()

    s = io.StringIO()
    ps = pstats.Stats(profiler, stream=s).sort_stats('cumtime')
    ps.print_stats()

    return s.getvalue()


In [6]:
report = profiling_fit(
    n_estimators=1,
    n_dim=1000,
    n_samples=2000,
    max_features=1000,
    feature_combinations=1000.0,
    max_depth=10,
    n_jobs=-1,
    max_leaf_nodes=30,
    treeple_params=params_treeple
)
print(report)

yesssss
         6822 function calls (6763 primitive calls) in 4.262 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    4.262    4.262 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/base.py:1313(wrapper)
        1    0.000    0.000    4.261    4.261 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:413(fit)
        1    0.000    0.000    4.259    4.259 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:641(_construct_trees)
        1    0.000    0.000    4.259    4.259 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/utils/parallel.py:54(__call__)
        1    0.000    0.000    4.259    4.259 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/joblib/parallel.py:1902(__call__)
        3    0.000    0.000    4.259    1.420 /

In [10]:
report = profiling_fit(
    n_estimators=1,
    n_dim=2048,
    n_samples=1600,
    max_features=2048,
    feature_combinations=64.0,
    max_depth=10,
    n_jobs=-1,
    max_leaf_nodes=30,
    treeple_params=params_treeple
)
print(report)

yesssss
         5829 function calls (5788 primitive calls) in 2.942 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    2.942    2.942 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/base.py:1313(wrapper)
        1    0.000    0.000    2.942    2.942 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:413(fit)
        1    0.000    0.000    2.940    2.940 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:641(_construct_trees)
        1    0.000    0.000    2.940    2.940 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/utils/parallel.py:54(__call__)
        1    0.000    0.000    2.940    2.940 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/joblib/parallel.py:1902(__call__)
        3    0.000    0.000    2.940    0.980 /

In [None]:
report200 = profiling_fit(
    n_estimators=200,
    n_dim=2048,
    n_samples=1600,
    max_features=2048,
    feature_combinations=64.0,
    max_depth=10,
    n_jobs=-1,
    max_leaf_nodes=30,
    treeple_params=params_treeple
)
print(report200)

yesssss
         368821 function calls (362779 primitive calls) in 101.226 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000  101.226  101.226 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/base.py:1313(wrapper)
        1    0.000    0.000  101.226  101.226 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:413(fit)
        1    0.000    0.000  101.221  101.221 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:641(_construct_trees)
        1    0.000    0.000  101.139  101.139 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/utils/parallel.py:54(__call__)
        1    0.000    0.000  101.139  101.139 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/joblib/parallel.py:1902(__call__)
      202    0.000    0.000  101.139    0

In [9]:
with open("experiments/result/cython_profile_reports/profile_report.txt", "w") as file:
    file.write(report)