In [4]:
%pip install econml
%pip install scikit-learn
%pip install numpy


import numpy as np
from sklearn.ensemble import RandomForestRegressor
from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from typing import Optional

def ll_regression_forest(X, Y, enable_ll_split=False, ll_split_weight_penalty=False,
                         ll_split_lambda=0.1, ll_split_variables=None, ll_split_cutoff=None,
                         num_trees=2000, clusters=None, equalize_cluster_weights=False,
                         sample_fraction=0.5, mtry=None, min_node_size=5, honesty=True,
                         honesty_fraction=0.5, honesty_prune_leaves=True, alpha=0.05,
                         imbalance_penalty=0, ci_group_size=2, tune_parameters="none",
                         tune_num_trees=50, tune_num_reps=100, tune_num_draws=1000,
                         num_threads=None, seed=None):

    np.random.seed(seed)

    if mtry is None:
        mtry = min(int(np.sqrt(X.shape[1]) + 20), X.shape[1])

    if ll_split_variables is None:
        ll_split_variables = np.arange(X.shape[1])

    if ll_split_cutoff is None:
        ll_split_cutoff = int(np.sqrt(X.shape[0]))

    def validate_num_threads(num_threads):
        # Implement thread validation based on your environment
        return num_threads

    num_threads = validate_num_threads(num_threads)

    # Train the random forest
    rf = RandomForestRegressor(n_estimators=num_trees, max_features=mtry, min_samples_leaf=min_node_size,
                               n_jobs=num_threads, random_state=seed)

    rf.fit(X, Y)

    if enable_ll_split and ll_split_cutoff > 0:
        D = np.hstack([np.ones((X.shape[0], 1)), X])
        J = np.eye(X.shape[1] + 1)
        J[0, 0] = 0
        overall_beta = np.linalg.inv(D.T @ D + ll_split_lambda * J) @ D.T @ Y
    else:
        overall_beta = None

    # Create the local linear correction if enabled
    if enable_ll_split:
        if overall_beta is not None:
            # Placeholder for more complex logic if needed
            pass

    # Return the trained forest and additional information
    return {
        "forest": rf,
        "overall_beta": overall_beta,
        "params": {
            "num_trees": num_trees,
            "mtry": mtry,
            "min_node_size": min_node_size,
            "honesty": honesty,
            "honesty_fraction": honesty_fraction,
            "honesty_prune_leaves": honesty_prune_leaves,
            "alpha": alpha,
            "imbalance_penalty": imbalance_penalty,
            "ci_group_size": ci_group_size,
            "num_threads": num_threads,
            "seed": seed
        }
    }

def predict_ll_regression_forest(forest_obj, newdata=None, linear_correction_variables=None,
                                 ll_lambda=None, ll_weight_penalty=False, num_threads=None,
                                 estimate_variance=False):

    rf = forest_obj["forest"]
    X_orig = newdata

    if newdata is not None:
        predictions = rf.predict(newdata)
    else:
        # Implement out-of-bag prediction logic if needed
        predictions = rf.oob_prediction_

    return predictions

# Example usage:
n = 50
p = 10
X = np.random.randn(n, p)
Y = X[:, 0] * np.random.randn(n)

forest_obj = ll_regression_forest(X, Y)
predictions = predict_ll_regression_forest(forest_obj, X)
print(predictions)

Collecting econml
  Using cached econml-0.15.0-cp312-cp312-win_amd64.whl.metadata (37 kB)
Collecting sparse (from econml)
  Using cached sparse-0.15.2-py2.py3-none-any.whl.metadata (4.5 kB)
Collecting statsmodels>=0.10 (from econml)
  Using cached statsmodels-0.14.2-cp312-cp312-win_amd64.whl.metadata (9.5 kB)
Collecting shap<0.44.0,>=0.38.1 (from econml)
  Using cached shap-0.43.0.tar.gz (389 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Installing backend dependencies: started
  Installing backend dependencies: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting lightgbm (from econml)
  Using cached lightgbm-4.3.0-py3-none-win_amd64.whl.metadata (19 kB)
Collecting slicer==0.0.7 (from shap<0.44.0,>=0.38.1->econml)

  error: subprocess-exited-with-error
  
  × Building wheel for shap (pyproject.toml) did not run successfully.
  │ exit code: 1
  ╰─> [171 lines of output]
      The nvcc binary could not be located in your $PATH. Either add it to your path, or set $CUDAHOME to enable CUDA.
      Exception occurred during setup, Error building cuda module: TypeError('cannot unpack non-iterable NoneType object')
      running bdist_wheel
      running build
      running build_py
      creating build
      creating build\lib.win-amd64-cpython-312
      creating build\lib.win-amd64-cpython-312\shap
      copying shap\datasets.py -> build\lib.win-amd64-cpython-312\shap
      copying shap\links.py -> build\lib.win-amd64-cpython-312\shap
      copying shap\_explanation.py -> build\lib.win-amd64-cpython-312\shap
      copying shap\_serializable.py -> build\lib.win-amd64-cpython-312\shap
      copying shap\__init__.py -> build\lib.win-amd64-cpython-312\shap
      creating build\lib.win-amd64-cpython-312\shap

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


ModuleNotFoundError: No module named 'econml'