In [380]:

import pandas as pd
import numpy as np
import sys
import traceback
import seaborn as sns
import matplotlib.pyplot as plt
from typing import Literal, Final, List, TypeVar, Dict, Tuple, Any, Optional, Iterable, Union
import sklearn as skl
import dataclasses

In [381]:
dataset: Literal["ihdp","jobs"] = "ihdp"

In [382]:
import warnings
warnings.filterwarnings("ignore")

#print(sns.plotting_context())

sns.set_theme(style="darkgrid",context="paper")

import matplotlib.style
if "seaborn-darkgrid" in matplotlib.style.available:
    matplotlib.style.use("seaborn-darkgrid")

%matplotlib inline

In [383]:
_seed: Final[int] = 42
"Using the meaning of life, the universe, and everything as the seed for RNG"

def rng() -> np.random.Generator:
    """
    Creates a new numpy random generator with a seed of 42.
    :return: a new numpy random generator with a seed of 42
    """
    return np.random.default_rng(seed=_seed)

def rng_state() -> np.random.RandomState:
    """
    Creates a new numpy randomstate with a seed of 42
    :return: a new numpy randomstate with a seed of 42
    """
    return np.random.RandomState(seed=_seed)

""

''

# IHDP dataset processing


In [384]:
def turn_01_columns_into_int(
        dataframe_to_edit: pd.DataFrame,
) -> pd.DataFrame:
    """
    Finds all of the columns that just contain values of 0 and 1,
    and converts all of those columns to ints.

    Dataframe will have an '01' and 'not_01' attr added to it.
    Labels for series that only contain values 0 and 1 will be in the '01' tuple
    Labels for every other series will be in the 'not_01' tuple

    MODIFIES THE GIVEN DATAFRAME!
    :param dataframe_to_edit: the dataframe that is being edited
    :return: The modified dataframe.
    DOES NOT COPY THE GIVEN ORIGINAL DATAFRAME.

    >>> import pandas as pd
    >>> print(pd.__version__)
    1.4.1
    >>> before: pd.DataFrame = pd.DataFrame.from_dict(data={"int01":[0,1,1,0],"flt01":[0.0, 1.0, 0.0, 1.0], "intNo": [-1,0,1,2], "fltNo":[-1.0, 0.0, 1.0, 2.0], "intNan": [0,1,None,0], "fltNan":[0.0,1.0,None,0.0]})
    >>> before_types = before.dtypes.values
    >>> after: pd.DataFrame = turn_01_columns_into_int(before.copy())
    >>> after_types = after.dtypes.values
    >>> print(after_types[0])
    uint8
    >>> print(after_types[1])
    uint8
    >>> print(f"{before_types[2] == after_types[2]} {before_types[3] == after_types[3]} {before_types[4] == after_types[4]} {before_types[5] == after_types[5]}")
    True True True True
    >>> print(f"{after.attrs['01']}")
    ('int01', 'flt01')
    >>> print(f"{after.attrs['not_01']} ")
    ('intNo', 'fltNo', 'intNan', 'fltNan')
    """
    cols_01: List[str] = []
    not_01:  List[str] = []
    for c in dataframe_to_edit.columns:
        #if dataframe_to_edit[c].dtype == np.uint8:
        #    continue
        if dataframe_to_edit[c].isin([0,1]).all():
            dataframe_to_edit[c] = dataframe_to_edit[c].astype(np.uint8)
            cols_01.append(c)
        else:
            not_01.append(c)
    dataframe_to_edit.attrs["01"] = tuple(cols_01)
    dataframe_to_edit.attrs["not_01"] = tuple(not_01)
    return dataframe_to_edit

In [385]:

ihdp_full: pd.DataFrame = turn_01_columns_into_int(
    pd.read_csv(f"{dataset}_full.csv")
)
"The full IHDP dataset (with supplementary t0 and t1 info) as a dataframe"

ihdp_full.head()

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x21,x22,x23,x24,t,yf,ycf,ite,t0,t1
0,1.397395,0.996346,-1.105624,-0.879606,0.308569,-1.023402,1,0,0,0,...,0,0,0,1,1,4.771232,-0.298509,4.657928,-0.298509,4.771232
1,0.269033,0.196818,0.383828,0.161703,-0.629189,1.460832,1,0,1,0,...,0,0,0,0,0,2.956273,5.78377,3.428604,2.956273,5.78377
2,1.051537,1.795874,-1.105624,0.161703,-0.629189,0.963985,1,0,1,1,...,0,0,0,1,0,4.164164,7.055789,3.658195,4.164164,7.055789
3,0.662446,0.196818,-0.733261,-0.879606,0.371086,-0.692171,1,0,0,0,...,0,0,0,0,1,6.172307,1.379697,4.585505,1.379697,6.172307
4,0.856992,1.795874,0.011465,-0.879606,0.558638,0.301522,0,1,1,0,...,0,0,0,0,1,7.834469,2.747986,4.265591,2.747986,7.834469


In [386]:
ihdp_full.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 747 entries, 0 to 746
Data columns (total 31 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   x0      747 non-null    float64
 1   x1      747 non-null    float64
 2   x2      747 non-null    float64
 3   x3      747 non-null    float64
 4   x4      747 non-null    float64
 5   x5      747 non-null    float64
 6   x6      747 non-null    uint8  
 7   x7      747 non-null    uint8  
 8   x8      747 non-null    uint8  
 9   x9      747 non-null    uint8  
 10  x10     747 non-null    uint8  
 11  x11     747 non-null    uint8  
 12  x12     747 non-null    uint8  
 13  x13     747 non-null    uint8  
 14  x14     747 non-null    uint8  
 15  x15     747 non-null    uint8  
 16  x16     747 non-null    uint8  
 17  x17     747 non-null    uint8  
 18  x18     747 non-null    uint8  
 19  x19     747 non-null    uint8  
 20  x20     747 non-null    uint8  
 21  x21     747 non-null    uint8  
 22  x2

In [387]:
ihdp_factuals: pd.DataFrame = ihdp_full.loc[:, ~ihdp_full.columns.isin(
    ["ycf","ite","t0","t1"]
)]
"A version of the IHDP dataset containing ONLY the factual data"

#ihdp_factuals_no_y: pd.DataFrame = ihdp_factuals.loc[:, ihdp_factuals.columns != "yf"]
"IHDP dataset with the factual Y omitted"

ihdp_factuals_y: pd.DataFrame = ihdp_factuals.loc[:, ihdp_factuals.columns == "yf"]
"Only the Y data from the IHDP dataset"

""

''

In [388]:

from sklearn.model_selection import StratifiedKFold, train_test_split

#ihdp_learn_validation_skf: StratifiedKFold = StratifiedKFold(n_splits=10, shuffle=True, random_state=_seed)
#"Using this to remove 10% of the treated/untreated factuals from ihdp for use as part of the validation dataset later on"

#ihdp_learn_indices, ihdp_validation_indices = [i for i in ihdp_learn_validation_skf.split(ihdp_factuals, ihdp_factuals["t"])][0]

ihdp_learn_full_df, ihdp_validation_full_df = train_test_split(
    ihdp_full,
    test_size=0.1,
    random_state=rng_state(),
    shuffle=True,
    stratify=ihdp_full["t"]
)

ihdp_learn_df: pd.DataFrame = ihdp_learn_full_df.loc[:, ~ihdp_learn_full_df.columns.isin(
    ["ycf","ite","t0","t1"]
)]
"The dataframe that is the subset of the IHDP factual data which will be used for learning feature importances etc"

ihdp_learn_df_x: pd.DataFrame = ihdp_learn_df.loc[:, ihdp_learn_df.columns != "yf"]
"X/T info for the dataframe that is the subset of the IHDP factual data which will be used for learning feature importances etc"
ihdp_learn_df_y: pd.DataFrame = ihdp_learn_df.loc[:, ihdp_learn_df.columns == "yf"]
"Y info for the dataframe that is the subset of the IHDP factual data which will be used for learning feature importances etc"

ihdp_learn_ite: pd.DataFrame = ihdp_learn_full_df.loc[:,ihdp_learn_full_df.columns=="ite"]

ihdp_validation_factual_df: pd.DataFrame = ihdp_validation_full_df.loc[:, ~ihdp_validation_full_df.columns.isin(
    ["ycf","ite","t0","t1"]
)]
ihdp_validation_factual_df_x: pd.DataFrame = ihdp_validation_factual_df.loc[:, ihdp_validation_factual_df.columns != "yf"]
ihdp_validation_factual_df_y: pd.DataFrame = ihdp_validation_factual_df.loc[:, ihdp_validation_factual_df.columns == "yf"]

""

''

In [389]:
from sklearn.model_selection import KFold
from sklearn.model_selection._validation import NotFittedError
from sklearn.base import RegressorMixin, TransformerMixin
from sklearn.linear_model import ARDRegression
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.preprocessing import QuantileTransformer
from sklearn.impute import KNNImputer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn import set_config
from sklearn.metrics import r2_score, make_scorer
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV, GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from math import inf

set_config(display="diagram")

R = TypeVar('R', bound=RegressorMixin)

In [390]:
def np_data_and_targets(
        df: pd.DataFrame,
        targetname: str = "yf"
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Converts dataframe into a couple of numpy ndarrays for the data without the labels,
    and the labels by themselves.
    :param df: the Dataframe
    :param targetname: The name of the column holding the targets
    :return: tuple of [ndarray of the values without the targets, just the class labels]
    """

    inputs:  np.ndarray = df.loc[:,df.columns != targetname].to_numpy()
    outputs: np.ndarray =  df.loc[:,targetname].to_numpy()

    return inputs, outputs

In [391]:


def halving_grid_searcher(
        regressor: R,
        param_grid: Dict[str, List[Any]],
        train_data: np.ndarray,
        train_targets: np.ndarray,
        k_folds: Union[KFold, Iterable[Tuple[np.ndarray, np.ndarray]]] = KFold(n_splits=5, shuffle=False),
        class_weights: Optional[np.ndarray] = None,
        resource: str = "n_samples"
) -> HalvingGridSearchCV:

    pipe: Pipeline = Pipeline([
        ("scaler", QuantileTransformer(output_distribution="normal")),
        ("imputer",KNNImputer(add_indicator=False, weights="distance")),
        ("regressor",regressor)
    ])

    n_splits: int = k_folds.get_n_splits() if isinstance(k_folds, skl.model_selection.BaseCrossValidator) else len(k_folds)

    n_max_resources: int = train_targets.size if resource=="n_samples" else pipe.get_params(deep=True)[resource]



    h_grid_search: HalvingGridSearchCV = HalvingGridSearchCV(
        estimator=pipe,
        param_grid=param_grid,
        factor=4,
        cv=k_folds,
        scoring=make_scorer(r2_score),
        refit=True,
        verbose=1,
        n_jobs=-1,
        aggressive_elimination=True,
        error_score=-1000000000000,
        resource=resource,
        max_resources= n_max_resources,
        min_resources= n_max_resources//4
        # I wanted to make this error score negative infinity, however, doing so caused a lot of
        # particularly unsightly warning messages to appear.

        # So, to save everyone involved from having to look at at a buttload of them with a buttload of numbers in them,
        # I'm just setting this to an incredibly low finite number which should be rather hard to reach.
        # And if this score (or an even lower score) somehow is reached legitimately, chances are that
        # the legitimate score being lower than the error score will be the least of one's concerns.
    )

    if class_weights is not None:

        h_grid_search.fit(
            train_data, train_targets, sample_weight=class_weights
        )
    else:
        h_grid_search.fit(
            train_data, train_targets
        )

    return h_grid_search



In [392]:
def nested_halving_grid_searcher(
        regressor: R,
        param_grid: Dict[str, List[Any]],
        learn_data: np.ndarray,
        learn_targets: np.ndarray,
        kfold_splits: int = 6,
        learn_classes: Optional[np.ndarray] = None,
        using_class_weights: bool = False,
        nested_rng_generator: Optional[np.random.RandomState] = None,
        resource: str = "n_samples"
) -> Dict[HalvingGridSearchCV, float]:

    if nested_rng_generator is None:
        nested_rng_generator = rng_state()

    h_grid_search_dicts: Dict[HalvingGridSearchCV, float] = {}

    the_splits: Iterable[np.ndarray, np.ndarray] = []

    child_splits: int = max(1, kfold_splits-1)
    child_kf: Union[KFold, Iterable[Tuple[np.ndarray, np.ndarray]]] = KFold(n_splits=child_splits, shuffle=False)

    if learn_classes is None:

        using_class_weights = False

        my_kf: KFold = KFold(
            n_splits=kfold_splits,
            shuffle=True,
            random_state=nested_rng_generator
        )
        the_splits = my_kf.split(learn_data, learn_targets)

    else:

        my_kf: StratifiedKFold = StratifiedKFold(
            n_splits=kfold_splits,
            shuffle=True,
            random_state=nested_rng_generator
        )

        the_splits = my_kf.split(np.zeros_like(learn_classes), learn_classes)




    for i, (train_indices, test_indices) in enumerate(the_splits, 1):
        print(f"-- {i}/{kfold_splits} start --")
        try:


            if learn_classes is not None:
                child_kf = [
                    i for i in StratifiedKFold(
                        n_splits=child_splits,
                        shuffle=False
                    ).split(
                        X = np.zeros_like(train_indices),
                        y = learn_classes[train_indices]
                    )
                ]

            if using_class_weights:

                train_classes: np.ndarray = np.take(learn_classes, train_indices)

                train_classes = train_classes / np.sum(train_classes)

                test_classes: np.ndarray = np.take(learn_classes, test_indices)
                test_classes = test_classes / np.sum(test_classes)

                current_search: HalvingGridSearchCV = halving_grid_searcher(
                    regressor,
                    param_grid,
                    learn_data[train_indices],
                    learn_targets[train_indices],
                    child_kf,
                    class_weights = train_classes,
                    resource=resource
                )

                current_score: float = current_search.score(
                    learn_data[test_indices],
                    learn_targets[test_indices]
                )

                h_grid_search_dicts[current_search] = current_score

            else:


                current_search: HalvingGridSearchCV = halving_grid_searcher(
                    regressor,
                    param_grid,
                    learn_data[train_indices],
                    learn_targets[train_indices],
                    child_kf,
                    resource=resource
                )

                current_score: float = current_search.score(
                    learn_data[test_indices],
                    learn_targets[test_indices]
                )

                h_grid_search_dicts[current_search] = current_score

            print(f"-- {i}/{kfold_splits} done. Score: {current_score} --")

        except NotFittedError as e:
            print("oh no! there was a not fitted error!", sys.stderr)
            print(e, sys.stderr)
            print(traceback.format_exc(), sys.stderr)




    return h_grid_search_dicts

In [393]:
from sklearn.ensemble import RandomForestRegressor


In [394]:
fpipeline = Pipeline(
    steps=[
        ("scaler",QuantileTransformer()),
        ("learner",RandomForestRegressor())
        #("learner",ARDRegression())
        #("learner",AdaBoostRegressor(base_estimator=ARDRegression()))
        #("learner",LinearRegression())
    ]
)
learner = fpipeline["learner"]
fpipeline.fit(ihdp_learn_df_x.to_numpy(), ihdp_learn_df_y.to_numpy())

fpipeline

In [395]:
fpipeline.score(ihdp_validation_factual_df_x.to_numpy(), ihdp_validation_factual_df_y.to_numpy())


0.7616532820515798

In [None]:

random_forest_searched_dict: Dict[
    HalvingGridSearchCV, float
] = nested_halving_grid_searcher(
    RandomForestRegressor(criterion="squared_error"),
    {
        #"regressor__n_estimators": [75,100,125],
        "regressor__min_samples_split": [2,4,6,8],
        "regressor__min_impurity_decrease": [0, *np.geomspace(0.00001,0.2,6)[1:]],
        "regressor__max_features": [None,"sqrt","log2",1,2],
        #"regressor__oob_score": [False, True],
        "regressor__ccp_alpha": [0, *np.geomspace(0.00001,0.2,6)[1:]]
    },
    ihdp_learn_df_x.values,
    ihdp_learn_df_y.values,
    learn_classes = ihdp_learn_df_x["t"].to_numpy(),
    kfold_splits=6,
    resource="regressor__n_estimators"
    #classes_ndarray=ihdp_learn_df_x["t"].to_numpy()
)

rf_searched: HalvingGridSearchCV = max(
    random_forest_searched_dict.keys(),
    key=lambda k: random_forest_searched_dict[k]
)

best_rf: Tuple[HalvingGridSearchCV, float] = (
    rf_searched,
    random_forest_searched_dict[rf_searched]
)

print(best_rf)

print(best_rf[1])


rf_searched.best_estimator_


-- 1/6 start --
n_iterations: 5
n_required_iterations: 5
n_possible_iterations: 2
min_resources_: 25
max_resources_: 100
aggressive_elimination: True
factor: 4
----------
iter: 0
n_candidates: 720
n_resources: 25
Fitting 5 folds for each of 720 candidates, totalling 3600 fits
----------
iter: 1
n_candidates: 180
n_resources: 25
Fitting 5 folds for each of 180 candidates, totalling 900 fits
----------
iter: 2
n_candidates: 45
n_resources: 25
Fitting 5 folds for each of 45 candidates, totalling 225 fits
----------
iter: 3
n_candidates: 12
n_resources: 25
Fitting 5 folds for each of 12 candidates, totalling 60 fits
----------
iter: 4
n_candidates: 3
n_resources: 100
Fitting 5 folds for each of 3 candidates, totalling 15 fits
-- 1/6 done. Score: 0.7342262697236708 --
-- 2/6 start --
n_iterations: 5
n_required_iterations: 5
n_possible_iterations: 2
min_resources_: 25
max_resources_: 100
aggressive_elimination: True
factor: 4
----------
iter: 0
n_candidates: 720
n_resources: 25
Fitting 5 fol

In [None]:
#ard_iter: List[int] = [200,300,400]
ard_tol: List[float] = [1e-2, 1e-3, 1e-4]
ard_alpha_lambda: List[float] = [1e-5, 1e-6, 1e-7]
ard_thresh_lambda: List[float] = [1e3, 1e4, 1e5]



ard_searched_dict: Dict[
    HalvingGridSearchCV, float
] = nested_halving_grid_searcher(
    ARDRegression(),
    {
        #"regressor__n_iter": ard_iter,
        "regressor__tol": ard_tol,
        "regressor__alpha_1" : ard_alpha_lambda,
        "regressor__alpha_2" : ard_alpha_lambda,
        "regressor__lambda_1" : ard_alpha_lambda,
        "regressor__lambda_2" : ard_alpha_lambda,
        "regressor__threshold_lambda": ard_thresh_lambda
    },
    ihdp_learn_df_x.values,
    ihdp_learn_df_y.values,
    learn_classes = ihdp_learn_df_x["t"].to_numpy(),
    kfold_splits=6,
    resource="regressor__n_iter"
)

ard_searched: HalvingGridSearchCV = max(
    ard_searched_dict.keys(),
    key=lambda k: ard_searched_dict[k]
)

best_ard: Tuple[HalvingGridSearchCV, float] = (
    ard_searched,
    ard_searched_dict[ard_searched]
)



print(best_ard)

print(best_ard[1])

ard_searched.best_estimator_


In [None]:


adaboost_estimators: List[int] = [40, 50, 60]
adaboost_learn_rate: List[float] = [0.8, 0.9, 1.0, 1.1, 1.2]
adaboost_loss: List[str]= ["linear", "square", "exponential"]

rf_ada_searched_dict: Dict[
    HalvingGridSearchCV, float
] = nested_halving_grid_searcher(
    AdaBoostRegressor(
        random_state = _seed,
        base_estimator = Pipeline(
            steps = [i for i in rf_searched.best_estimator_.named_steps.items()]
        )
    ),
    {
        #"regressor__base_estimator": [
        #    Pipeline(
        #        steps = [i for i in rf.best_estimator_.named_steps.items()]
        #    ) for rf in random_forest_searched_dict.keys()
        #],
        #"regressor__n_estimators": adaboost_estimators,
        "regressor__learning_rate": adaboost_learn_rate,
        "regressor__loss": adaboost_loss
    },
    ihdp_learn_df_x.values,
    ihdp_learn_df_y.values,
    learn_classes = ihdp_learn_df_x["t"].to_numpy(),
    kfold_splits=6,
    resource="regressor__n_estimators"
)

rf_ada_searched: HalvingGridSearchCV = max(
    rf_ada_searched_dict.keys(),
    key=lambda k: rf_ada_searched_dict[k]
)

best_rf_ada: Tuple[HalvingGridSearchCV, float] = (
    rf_ada_searched,
    rf_ada_searched_dict[rf_ada_searched]
)


print(best_rf_ada)

print(best_rf_ada[1])

rf_ada_searched.best_estimator_


In [None]:
ard_ada_searched_dict: Dict[
    HalvingGridSearchCV, float
] = nested_halving_grid_searcher(
    AdaBoostRegressor(
        random_state=_seed,
        base_estimator=Pipeline(
            steps = [i for i in ard_searched.best_estimator_.named_steps.items()]
        )
    ),
    {
        #"regressor__base_estimator": [
        #    Pipeline(
        #        steps = [i for i in ard.best_estimator_.named_steps.items()]
        #    ) for ard in ard_searched_dict.keys()
        #],
        #"regressor__n_estimators": adaboost_estimators,
        "regressor__learning_rate": adaboost_learn_rate,
        "regressor__loss": adaboost_loss
    },
    ihdp_learn_df_x.values,
    ihdp_learn_df_y.values,
    learn_classes = ihdp_learn_df_x["t"].to_numpy(),
    kfold_splits=6,
    resource="regressor__n_estimators"
)

ard_ada_searched: HalvingGridSearchCV = max(
    ard_ada_searched_dict.keys(),
    key=lambda k: ard_ada_searched_dict[k]
)

best_ard_ada: Tuple[HalvingGridSearchCV, float] = (
    ard_ada_searched,
    ard_ada_searched_dict[ard_ada_searched]
)



print(best_ard_ada)

print(best_ard_ada[1])

ard_ada_searched.best_estimator_

In [None]:
@dataclasses.dataclass(init=True, eq=True, repr=True, frozen=True)
class RegressorInfoDataclass:

    regressor: Pipeline

    t0_t1_ite_predictions: pd.DataFrame

    t0_r2: float
    t1_r2: float
    ite_r2: float

    def __lt__(self, other: "RegressorInfoDataclass") -> bool:
        if self.ite_r2 < other.ite_r2:
            return True
        if self.ite_r2 == other.ite_r2:
            # basically the r2 scores for t1 and t0 are shifted down to
            # have an upper limit of -1, then the products of the
            # shifted r2 scores are found.
            # higher product = worse r2 scores: counted as 'less than'
            # returns true if this object's combined r2 is worse than other.
            return (
               (self.t0_r2-2) * (self.t1_r2-2)
            ) > (
                (other.t0_r2-2) * (other.t1_r2-2)
            )
        return False


    @classmethod
    def make(
            cls,
            the_regressor: Pipeline,
            the_x_data: pd.DataFrame,
            true_t0_t1_ite: pd.DataFrame
    ) -> "RegressorInfoDataclass":

        x_with_predictions: pd.DataFrame = the_x_data.copy()

        x_t_data: pd.DataFrame = the_x_data.copy()

        x_t_data["t"] = 0

        x_with_predictions["t0"] = the_regressor.predict(x_t_data.to_numpy())

        t0_r2_score: float = r2_score(true_t0_t1_ite["t0"], x_with_predictions["t0"])

        x_t_data["t"] = 1

        x_with_predictions["t1"] = the_regressor.predict(x_t_data.to_numpy())

        t1_r2_score: float = r2_score(true_t0_t1_ite["t1"], x_with_predictions["t1"])

        x_with_predictions["ite"] = x_with_predictions["t1"] - x_with_predictions["t0"]

        ite_r2_score: float = r2_score(true_t0_t1_ite["ite"], x_with_predictions["ite"])

        return RegressorInfoDataclass(
            regressor=the_regressor,
            t0_t1_ite_predictions=x_with_predictions,
            t0_r2  = t0_r2_score,
            t1_r2  = t1_r2_score,
            ite_r2 = ite_r2_score
        )



In [None]:
all_best_classifiers: List[RegressorInfoDataclass] = [
    RegressorInfoDataclass.make(
        reg,
        ihdp_factuals,
        ihdp_full
    ) for reg in [
        rf_searched.best_estimator_,
        ard_searched.best_estimator_,
        rf_ada_searched.best_estimator_,
        ard_searched.best_estimator_
    ]
]


