# prepare

In [1]:
import treeple
from treeple import ObliqueRandomForestClassifier
from sklearn.model_selection import train_test_split
from treeple.datasets import make_trunk_classification
import cProfile
import pstats
import io

In [2]:
# Shared hyperparameters that used for both models
MAX_DEPTH = 10
N_ESTIMATORS = 10
RANDOM_SEED = 42
N_JOBS=-1
BOOTSTRAP = True
MAX_FEATURE = 3000
FEATURE_COMBINATIONS = 1000.0

params_treeple = {}
params_treeple["n_estimators"] = int(N_ESTIMATORS)
params_treeple["criterion"] = "entropy"
params_treeple["max_depth"] = MAX_DEPTH
params_treeple["min_samples_split"] = 2
params_treeple["min_samples_leaf"] = 1
params_treeple["min_weight_fraction_leaf"] = 0.0
params_treeple["max_features"] = MAX_FEATURE
params_treeple["max_leaf_nodes"] = 30
params_treeple["min_impurity_decrease"] = 0.0
params_treeple["bootstrap"] = BOOTSTRAP
params_treeple["oob_score"] = False
params_treeple["n_jobs"] = N_JOBS
params_treeple["random_state"] = RANDOM_SEED
params_treeple["verbose"] = 0
params_treeple["warm_start"] = False
params_treeple["class_weight"] = None
params_treeple["max_samples"] = None
params_treeple["feature_combinations"] = FEATURE_COMBINATIONS


In [3]:
X, y = make_trunk_classification(n_samples=1000, n_dim=1000)

yesssss


# profile function

In [7]:

def profiling_fit(n_estimators, n_dim, n_samples, max_features, feature_combinations, max_depth, n_jobs, max_leaf_nodes, treeple_params=None):
    X, y = make_trunk_classification(n_samples=n_samples, n_dim=n_dim)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    if treeple_params is None:
        params = {
            "n_estimators": n_estimators,
            "max_depth": max_depth,
            "max_features": max_features,
            "feature_combinations": feature_combinations,
            "n_jobs": n_jobs,
            "max_leaf_nodes": max_leaf_nodes
        }
    else:
        params = treeple_params.copy()
        params.update({
            "n_estimators": n_estimators,
            "max_depth": max_depth,
            "max_features": max_features,
            "feature_combinations": feature_combinations,
            "n_jobs": n_jobs,
            "max_leaf_nodes": max_leaf_nodes,
        })

    model = ObliqueRandomForestClassifier(**params)

    
    profiler = cProfile.Profile()
    profiler.enable()

    model.fit(X_train, y_train)

    profiler.disable()

    s = io.StringIO()
    ps = pstats.Stats(profiler, stream=s).sort_stats('tottime')
    ps.print_stats()

    return s.getvalue()


# profile results

In [6]:
report = profiling_fit(
    n_estimators=1,
    n_dim=1000,
    n_samples=2000,
    max_features=1000,
    feature_combinations=1000.0,
    max_depth=10,
    n_jobs=-1,
    max_leaf_nodes=30,
    treeple_params=params_treeple
)
print(report)

yesssss
start profiling
         6676 function calls (6617 primitive calls) in 3.913 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    3.913    3.913 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/base.py:1313(wrapper)
        1    0.000    0.000    3.912    3.912 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:413(fit)
        1    0.000    0.000    3.911    3.911 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:641(_construct_trees)
        1    0.000    0.000    3.911    3.911 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/utils/parallel.py:54(__call__)
        1    0.000    0.000    3.911    3.911 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/joblib/parallel.py:1902(__call__)
        3    0.000    0.000    

In [10]:
report = profiling_fit(
    n_estimators=1,
    n_dim=2048,
    n_samples=1600,
    max_features=2048,
    feature_combinations=64.0,
    max_depth=10,
    n_jobs=-1,
    max_leaf_nodes=30,
    treeple_params=params_treeple
)
print(report)

yesssss
         5829 function calls (5788 primitive calls) in 2.942 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    2.942    2.942 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/base.py:1313(wrapper)
        1    0.000    0.000    2.942    2.942 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:413(fit)
        1    0.000    0.000    2.940    2.940 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:641(_construct_trees)
        1    0.000    0.000    2.940    2.940 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/utils/parallel.py:54(__call__)
        1    0.000    0.000    2.940    2.940 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/joblib/parallel.py:1902(__call__)
        3    0.000    0.000    2.940    0.980 /

In [None]:
report200 = profiling_fit(
    n_estimators=200,
    n_dim=2048,
    n_samples=1600,
    max_features=2048,
    feature_combinations=64.0,
    max_depth=10,
    n_jobs=-1,
    max_leaf_nodes=30,
    treeple_params=params_treeple
)
print(report200)

yesssss
         368821 function calls (362779 primitive calls) in 101.226 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000  101.226  101.226 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/base.py:1313(wrapper)
        1    0.000    0.000  101.226  101.226 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:413(fit)
        1    0.000    0.000  101.221  101.221 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:641(_construct_trees)
        1    0.000    0.000  101.139  101.139 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/utils/parallel.py:54(__call__)
        1    0.000    0.000  101.139  101.139 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/joblib/parallel.py:1902(__call__)
      202    0.000    0.000  101.139    0

In [9]:
with open("experiments/result/cython_profile_reports/profile_report.txt", "w") as file:
    file.write(report)

In [16]:
report_njobs1 = profiling_fit(
    n_estimators=10,
    n_dim=2048,
    n_samples=1600,
    max_features=2048,
    feature_combinations=64.0,
    max_depth=10,
    n_jobs=1,
    max_leaf_nodes=30,
    treeple_params=params_treeple
)
print(report_njobs1)

yesssss
         23913 function calls (23609 primitive calls) in 29.726 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   29.726   29.726 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/base.py:1313(wrapper)
        1    0.000    0.000   29.725   29.725 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:413(fit)
        1    0.000    0.000   29.724   29.724 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:641(_construct_trees)
        1    0.000    0.000   29.720   29.720 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/utils/parallel.py:54(__call__)
        1    0.000    0.000   29.720   29.720 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/joblib/parallel.py:1902(__call__)
       12    0.000    0.000   29.720    2.47

In [4]:
report_njobs1 = profiling_fit(
    n_estimators=10,
    n_dim=2048,
    n_samples=1600,
    max_features=2048,
    feature_combinations=64.0,
    max_depth=10,
    n_jobs=1,
    max_leaf_nodes=30,
    treeple_params=params_treeple
)
print(report_njobs1)

yesssss
         60395 function calls (60040 primitive calls) in 28.713 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   28.713   28.713 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/base.py:1313(wrapper)
        1    0.000    0.000   28.713   28.713 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:413(fit)
        1    0.000    0.000   28.711   28.711 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/treeple/_lib/sklearn/ensemble/_forest.py:641(_construct_trees)
        1    0.000    0.000   28.708   28.708 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/sklearn/utils/parallel.py:54(__call__)
        1    0.000    0.000   28.708   28.708 /home/clark/anaconda3/envs/treeple1/lib/python3.10/site-packages/joblib/parallel.py:1902(__call__)
       12    0.000    0.000   28.708    2.39

In [5]:
report_njobs1 = profiling_fit(
    n_estimators=10,
    n_dim=2048,
    n_samples=1600,
    max_features=2048,
    feature_combinations=64.0,
    max_depth=10,
    n_jobs=1,
    max_leaf_nodes=30,
    treeple_params=params_treeple
)
print(report_njobs1)

yesssss
------------------------------------------------------------
Time to initialize Trees 7.981599992490374e-05
Time taken for builder.build 2.6661334179989353
_build_tree total time spent:  2.6663413679998484
------------------------------------------------------------
Time to initialize Trees 4.5812997996108606e-05
Time taken for builder.build 2.656670637999923
_build_tree total time spent:  2.6568380829994567
------------------------------------------------------------
Time to initialize Trees 4.694499875768088e-05
Time taken for builder.build 3.1698126530027366
_build_tree total time spent:  3.16997572100081
------------------------------------------------------------
Time to initialize Trees 4.512300074566156e-05
Time taken for builder.build 3.0429017189999286
_build_tree total time spent:  3.043063785000413
------------------------------------------------------------
Time to initialize Trees 4.55230001534801e-05
Time taken for builder.build 2.7408138639984827
_build_tree tota

In [6]:
report_njobs1 = profiling_fit(
    n_estimators=10,
    n_dim=2048,
    n_samples=1600,
    max_features=2048,
    feature_combinations=2.0,
    max_depth=10,
    n_jobs=1,
    max_leaf_nodes=30,
    treeple_params=params_treeple
)
print(report_njobs1)

yesssss
------------------------------------------------------------
Time to initialize Trees 7.870599984016735e-05
Time taken for builder.build 1.3997408140003245
_build_tree total time spent:  1.3999389830005384
------------------------------------------------------------
Time to initialize Trees 4.9231999582843855e-05
Time taken for builder.build 1.2578911950004112
_build_tree total time spent:  1.2582191250003234
------------------------------------------------------------
Time to initialize Trees 4.387199987831991e-05
Time taken for builder.build 1.265102391000255
_build_tree total time spent:  1.2652426730001025
------------------------------------------------------------
Time to initialize Trees 4.311100019549485e-05
Time taken for builder.build 1.2914144510014012
_build_tree total time spent:  1.291562537999198
------------------------------------------------------------
Time to initialize Trees 4.562499998428393e-05
Time taken for builder.build 1.2874892899999395
_build_tree t

In [8]:
report_njobs1 = profiling_fit(
    n_estimators=10,
    n_dim=2048,
    n_samples=1600,
    max_features=2048,
    feature_combinations=2.0,
    max_depth=10,
    n_jobs=1,
    max_leaf_nodes=30,
    treeple_params=params_treeple
)
print(report_njobs1)

yesssss
------------------------------------------------------------
Time to initialize Trees 5.113500083098188e-05
Time taken for builder.build 1.3682690839996212
_build_tree total time spent:  1.368441155000255
------------------------------------------------------------
Time to initialize Trees 4.528499994194135e-05
Time taken for builder.build 1.310755552000046
_build_tree total time spent:  1.310910520998732
------------------------------------------------------------
Time to initialize Trees 4.522400013229344e-05
Time taken for builder.build 1.3628098019999015
_build_tree total time spent:  1.3629629780007235
------------------------------------------------------------
Time to initialize Trees 4.794000051333569e-05
Time taken for builder.build 1.325344942000811
_build_tree total time spent:  1.325505881999561
------------------------------------------------------------
Time to initialize Trees 4.5614999180543236e-05
Time taken for builder.build 1.2991861640002753
_build_tree tota