In [20]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
c:\Users\Joaquín Amat\Documents\GitHub\skforecast


In [21]:
import numpy as np
from sklearn.preprocessing import KBinsDiscretizer
from timeit import repeat
import numpy as np
from skforecast.preprocessing import QuantileBinner

In [22]:
X = np.random.normal(10, 10, 10000)
X_reshaped = X.reshape(-1, 1)

In [23]:
binner = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy='quantile', dtype=np.float64)
times_fit = repeat("binner.fit_transform(X_reshaped)", repeat=100, number=10, globals=globals())
times_transform = repeat("binner.transform(X_reshaped[0].reshape(1, -1))", repeat=1000, number=10, globals=globals())
print(f"Sklearn KBinsDiscretizer {binner.get_params()}")
print(f"    Fit      : {1000 * np.mean(times_fit):.6f} μs +- {1000 * np.std(times_fit):.6f}")
print(f"    Transform: {1000 * np.mean(times_transform):.6f} μs +- {1000 * np.std(times_transform):.6f}")
print("")

binner = QuantileBinner(n_bins=10, method="linear", dtype=np.float64)
times_fit = repeat("binner.fit_transform(X)", repeat=100, number=10, globals=globals())
times_transform = repeat("binner.transform(X[0])", repeat=1000, number=10, globals=globals())
print(f"QuantileBinner {binner.get_params()}")
print(f"    Fit      : {1000 * np.mean(times_fit):.6f} μs +- {1000 * np.std(times_fit):.6f}")
print(f"    Transform: {1000 * np.mean(times_transform):.6f} μs +- {1000 * np.std(times_transform):.6f}")
print("")

binner = QuantileBinner(n_bins=10, method="closest_observation", dtype=np.uint8)
times_fit = repeat("binner.fit_transform(X)", repeat=100, number=10, globals=globals())
times_transform = repeat("binner.transform(X[0])", repeat=1000, number=10, globals=globals())
print(f"QuantileBinner {binner.get_params()}")
print(f"    Fit      : {1000 * np.mean(times_fit):.6f} μs +- {1000 * np.std(times_fit):.6f}")
print(f"    Transform: {1000 * np.mean(times_transform):.6f} μs +- {1000 * np.std(times_transform):.6f}")

Sklearn KBinsDiscretizer {'dtype': <class 'numpy.float64'>, 'encode': 'ordinal', 'n_bins': 10, 'random_state': None, 'strategy': 'quantile', 'subsample': 200000}
    Fit      : 11.770960 μs +- 4.991791
    Transform: 0.762327 μs +- 0.376949

QuantileBinner {'n_bins': 10, 'method': 'linear', 'subsample': 200000, 'dtype': <class 'numpy.float64'>}
    Fit      : 9.236000 μs +- 2.183521
    Transform: 0.135653 μs +- 0.126712

QuantileBinner {'n_bins': 10, 'method': 'closest_observation', 'subsample': 200000, 'dtype': <class 'numpy.uint8'>}
    Fit      : 7.307493 μs +- 1.689545
    Transform: 0.118028 μs +- 0.081507


In [24]:
from sklearn.model_selection import ParameterGrid

params = {
    "n_bins": [2, 10, 20],
    "method": ["linear"],
    "subsample": [200000],
}

parm_grid = ParameterGrid(params)

X = np.random.normal(10, 10, 10000)

for param in parm_grid:
    print(param)
    binner_1 = KBinsDiscretizer(
        n_bins=param["n_bins"],
        encode="ordinal",
        strategy="quantile",
        dtype=np.float64,
        random_state=789654,
    )
    binner_2 = QuantileBinner(
        n_bins=param["n_bins"],
        method=param["method"],
        subsample=param["subsample"],
        dtype=np.float64,
        random_state=789654,
    )

    binner_1.fit(X.reshape(-1, 1))
    binner_2.fit(X)

    transformed_1 = binner_1.transform(X.reshape(-1, 1)).flatten()
    transformed_2 = binner_2.transform(X)

    np.testing.assert_array_almost_equal(binner_1.bin_edges_[0], binner_2.bin_edges_)
    np.testing.assert_array_almost_equal(transformed_1, transformed_2)

{'method': 'linear', 'n_bins': 2, 'subsample': 200000}
{'method': 'linear', 'n_bins': 10, 'subsample': 200000}
{'method': 'linear', 'n_bins': 20, 'subsample': 200000}
