In [1]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import random
from sklearn.neural_network import MLPRegressor
import tqdm
import warnings

warnings.filterwarnings("ignore")

random.seed(42)
np.random.seed(42)

## data


def dgp_bivariatenormal(N=10000, cor = 0.9):
    m = lambda x: x[:, 0] ** 2

    x = np.random.multivariate_normal(
        mean=[0, 0], cov=[[1, cor], [cor, 1]], size=N
    )
    data = pd.DataFrame(x, columns=["x1", "x2"])
    data["y"] = m(data[["x1", "x2"]].values) + np.random.normal(0, 0.2, N)
    X = data[["x1", "x2"]]
    y = data["y"]

    return X, y


## pfi


def pfi(model, X, y, feature):
    baseline_score = mean_squared_error(y, model.predict(X))
    X_tilde = X.copy()
    X_tilde[feature] = np.random.permutation(X_tilde[feature])
    score = mean_squared_error(y, model.predict(X_tilde))
    return score - baseline_score


def pfis(model, X, y):
    return {feature: pfi(model, X, y, feature) for feature in X.columns}


def pfiss(model, X_train, y_train, X_test, y_test, repetitions=100):
    scores = pd.DataFrame([], columns=X.columns)
    for ii in tqdm.tqdm(range(repetitions)):
        model.fit(X_train, y_train)
        pfi_scores = pfis(model, X_test, y_test)
        scores = pd.concat([scores, pd.DataFrame(pfi_scores, index=[ii])])
    scores_agg = scores.mean(axis=0).to_frame().T
    scores_agg.index = ["mean"]
    scores_agg.loc["std"] = scores.std(axis=0)
    scores_agg.loc["min"] = scores.min(axis=0)
    scores_agg.loc["max"] = scores.max(axis=0)
    return scores_agg


In [16]:
from sklearn.linear_model import (
    LinearRegression,
    Ridge,
    Lasso,
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    ExtraTreesRegressor,
    AdaBoostRegressor,
    BaggingRegressor,
    HistGradientBoostingRegressor,
)
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor

models = {
    "LinearRegression": LinearRegression(),
    "Lasso": Lasso(),
    "DecisionTree": DecisionTreeRegressor(random_state=42),
    "RandomForest": RandomForestRegressor(random_state=42),
    "ExtraTrees": ExtraTreesRegressor(random_state=42),
    "GradientBoosting": GradientBoostingRegressor(random_state=42),
    "HistGradientBoosting": HistGradientBoostingRegressor(random_state=42),
    "AdaBoost": AdaBoostRegressor(random_state=42),
    "Bagging": BaggingRegressor(random_state=42),
    "MLPRegressor": MLPRegressor(random_state=42, max_iter=1000),
    "SVR": SVR(),
    "KNN": KNeighborsRegressor(),
}
for cor in [0, 0.3, 0.6, 0.9]:
    X, y = dgp_bivariatenormal(N=10000, cor=cor)
    print(f"CORRELATION={cor}")
    print(X.corr())
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    for name, model in models.items():
        model.fit(X_train, y_train)
        mse = mean_squared_error(y_test, model.predict(X_test))
        pfi_scores = pfiss(model, X_train, y_train, X_test, y_test, repetitions=100)
        print(f"{name}: MSE={mse:.4f}, PFI={pfi_scores}")

CORRELATION=0
         x1       x2
x1  1.00000  0.00084
x2  0.00084  1.00000


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:00<00:00, 132.42it/s]


LinearRegression: MSE=1.9673, PFI=            x1        x2
mean -0.000013 -0.000476
std   0.000312  0.001135
min  -0.001077 -0.003783
max   0.000583  0.001952


100%|██████████| 100/100 [00:00<00:00, 153.19it/s]


Lasso: MSE=1.9665, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:05<00:00, 18.10it/s]


DecisionTree: MSE=0.0841, PFI=            x1        x2
mean  3.863779  0.000343
std   0.091372  0.004024
min   3.638379 -0.009624
max   4.051613  0.013544


100%|██████████| 100/100 [05:19<00:00,  3.19s/it]


RandomForest: MSE=0.0466, PFI=            x1        x2
mean  3.854633 -0.000341
std   0.091441  0.000837
min   3.633923 -0.002283
max   4.090532  0.001623


100%|██████████| 100/100 [02:04<00:00,  1.25s/it]


ExtraTrees: MSE=0.0498, PFI=            x1        x2
mean  3.854723  0.000142
std   0.082563  0.000820
min   3.614130 -0.001703
max   4.094536  0.002964


100%|██████████| 100/100 [01:28<00:00,  1.13it/s]


GradientBoosting: MSE=0.0391, PFI=            x1        x2
mean  3.853638  0.000115
std   0.079878  0.000383
min   3.612521 -0.000380
max   4.019048  0.001431


100%|██████████| 100/100 [00:52<00:00,  1.89it/s]


HistGradientBoosting: MSE=0.0576, PFI=            x1        x2
mean  3.930768 -0.000604
std   0.096621  0.001151
min   3.659158 -0.002893
max   4.168251  0.002485


100%|██████████| 100/100 [00:37<00:00,  2.69it/s]


AdaBoost: MSE=0.4198, PFI=            x1   x2
mean  2.764103  0.0
std   0.061096  0.0
min   2.567479  0.0
max   2.921966  0.0


100%|██████████| 100/100 [00:33<00:00,  2.97it/s]


Bagging: MSE=0.0509, PFI=            x1        x2
mean  3.876550 -0.000455
std   0.090376  0.000879
min   3.642744 -0.002564
max   4.095895  0.001928


100%|██████████| 100/100 [02:34<00:00,  1.55s/it]


MLPRegressor: MSE=0.0385, PFI=            x1        x2
mean  3.854908  0.000086
std   0.088635  0.000170
min   3.643052 -0.000313
max   4.059193  0.000560


100%|██████████| 100/100 [06:01<00:00,  3.62s/it]


SVR: MSE=0.0417, PFI=            x1        x2
mean  3.813108  0.000522
std   0.081620  0.002328
min   3.609502 -0.002971
max   4.052613  0.012927


100%|██████████| 100/100 [00:02<00:00, 44.60it/s]


KNN: MSE=0.0556, PFI=            x1        x2
mean  3.737497  0.001169
std   0.086642  0.004294
min   3.479219 -0.005660
max   3.881928  0.016878
CORRELATION=0.3
          x1        x2
x1  1.000000  0.295301
x2  0.295301  1.000000


100%|██████████| 100/100 [00:00<00:00, 157.59it/s]


LinearRegression: MSE=2.2244, PFI=            x1        x2
mean -0.011590  0.003191
std   0.002911  0.001421
min  -0.018645 -0.000705
max  -0.005373  0.006349


100%|██████████| 100/100 [00:00<00:00, 164.47it/s]


Lasso: MSE=2.2127, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:05<00:00, 17.56it/s]


DecisionTree: MSE=0.0776, PFI=            x1        x2
mean  4.344746  0.003817
std   0.089893  0.002094
min   4.086614 -0.001214
max   4.582445  0.008395


100%|██████████| 100/100 [05:32<00:00,  3.32s/it]


RandomForest: MSE=0.0460, PFI=            x1        x2
mean  4.324058  0.001007
std   0.099189  0.000789
min   3.821794 -0.000962
max   4.547465  0.002960


100%|██████████| 100/100 [02:03<00:00,  1.23s/it]


ExtraTrees: MSE=0.0504, PFI=            x1        x2
mean  4.334732  0.000356
std   0.103385  0.000898
min   4.079095 -0.001578
max   4.602666  0.002116


100%|██████████| 100/100 [01:28<00:00,  1.13it/s]


GradientBoosting: MSE=0.0398, PFI=            x1        x2
mean  4.313196  0.000135
std   0.096239  0.000114
min   4.037590 -0.000109
max   4.518155  0.000506


100%|██████████| 100/100 [00:53<00:00,  1.88it/s]


HistGradientBoosting: MSE=0.0609, PFI=            x1        x2
mean  4.292695 -0.000610
std   0.102774  0.000878
min   4.009607 -0.002224
max   4.524656  0.002734


100%|██████████| 100/100 [00:37<00:00,  2.68it/s]


AdaBoost: MSE=0.4163, PFI=            x1   x2
mean  3.151895  0.0
std   0.074039  0.0
min   2.985979  0.0
max   3.326088  0.0


100%|██████████| 100/100 [00:35<00:00,  2.79it/s]


Bagging: MSE=0.0501, PFI=            x1        x2
mean  4.331977  0.000208
std   0.106231  0.000771
min   4.086622 -0.001352
max   4.567673  0.002368


100%|██████████| 100/100 [02:51<00:00,  1.71s/it]


MLPRegressor: MSE=0.0395, PFI=            x1        x2
mean  4.302958  0.000690
std   0.098288  0.000284
min   4.074472  0.000113
max   4.575419  0.001397


100%|██████████| 100/100 [05:59<00:00,  3.59s/it]


SVR: MSE=0.0430, PFI=            x1        x2
mean  4.201229  0.009998
std   0.087768  0.005590
min   3.991888  0.001557
max   4.374788  0.024876


100%|██████████| 100/100 [00:02<00:00, 44.76it/s]


KNN: MSE=0.0534, PFI=            x1        x2
mean  4.127592  0.021486
std   0.092369  0.008355
min   3.909421  0.004711
max   4.339016  0.049502
CORRELATION=0.6
          x1        x2
x1  1.000000  0.588319
x2  0.588319  1.000000


100%|██████████| 100/100 [00:00<00:00, 160.83it/s]


LinearRegression: MSE=1.9887, PFI=            x1        x2
mean  0.004048  0.000232
std   0.002399  0.000403
min  -0.002792 -0.001396
max   0.009459  0.001315


100%|██████████| 100/100 [00:00<00:00, 165.69it/s]


Lasso: MSE=1.9917, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:05<00:00, 18.79it/s]


DecisionTree: MSE=0.0861, PFI=            x1        x2
mean  3.891849 -0.001705
std   0.082967  0.002863
min   3.697787 -0.007040
max   4.112897  0.005437


100%|██████████| 100/100 [05:34<00:00,  3.35s/it]


RandomForest: MSE=0.0508, PFI=            x1        x2
mean  3.871157 -0.000122
std   0.098279  0.000743
min   3.545633 -0.002623
max   4.135522  0.001571


100%|██████████| 100/100 [02:03<00:00,  1.23s/it]


ExtraTrees: MSE=0.0545, PFI=            x1        x2
mean  3.866437 -0.000148
std   0.083277  0.000987
min   3.658295 -0.002472
max   4.073157  0.002609


100%|██████████| 100/100 [01:29<00:00,  1.12it/s]


GradientBoosting: MSE=0.0430, PFI=            x1        x2
mean  3.872998 -0.000076
std   0.080662  0.000128
min   3.616274 -0.000350
max   4.065046  0.000286


100%|██████████| 100/100 [00:53<00:00,  1.88it/s]


HistGradientBoosting: MSE=0.0766, PFI=            x1        x2
mean  3.740978  0.003184
std   0.078224  0.001575
min   3.524931 -0.002481
max   3.888964  0.006610


100%|██████████| 100/100 [00:37<00:00,  2.67it/s]


AdaBoost: MSE=0.4387, PFI=            x1   x2
mean  2.782195  0.0
std   0.059212  0.0
min   2.637178  0.0
max   2.960180  0.0


100%|██████████| 100/100 [00:33<00:00,  2.99it/s]


Bagging: MSE=0.0539, PFI=            x1        x2
mean  3.859004  0.000300
std   0.084908  0.000976
min   3.663146 -0.002556
max   4.038548  0.003072


100%|██████████| 100/100 [02:23<00:00,  1.43s/it]


MLPRegressor: MSE=0.0435, PFI=            x1        x2
mean  3.813805  0.003647
std   0.081486  0.001181
min   3.613025  0.001231
max   3.998697  0.007231


100%|██████████| 100/100 [05:51<00:00,  3.51s/it]


SVR: MSE=0.0545, PFI=            x1        x2
mean  3.598161  0.055593
std   0.092392  0.016646
min   3.393994  0.018172
max   3.889210  0.099977


100%|██████████| 100/100 [00:02<00:00, 44.29it/s]


KNN: MSE=0.0602, PFI=            x1        x2
mean  3.486306  0.110535
std   0.080031  0.020866
min   3.280350  0.065832
max   3.646570  0.168640
CORRELATION=0.9
         x1       x2
x1  1.00000  0.90226
x2  0.90226  1.00000


100%|██████████| 100/100 [00:00<00:00, 154.44it/s]


LinearRegression: MSE=1.9223, PFI=            x1        x2
mean  0.000563  0.000839
std   0.002440  0.000582
min  -0.005361 -0.000715
max   0.005277  0.002241


100%|██████████| 100/100 [00:00<00:00, 158.75it/s]


Lasso: MSE=1.9210, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:05<00:00, 18.69it/s]


DecisionTree: MSE=0.0794, PFI=            x1        x2
mean  3.766616  0.005630
std   0.087437  0.001772
min   3.526393  0.000640
max   3.968657  0.010624


100%|██████████| 100/100 [05:26<00:00,  3.27s/it]


RandomForest: MSE=0.0485, PFI=            x1        x2
mean  3.716279  0.001095
std   0.096054  0.000628
min   3.500141 -0.000698
max   3.909234  0.002512


100%|██████████| 100/100 [01:57<00:00,  1.17s/it]


ExtraTrees: MSE=0.0508, PFI=            x1        x2
mean  3.317336  0.066476
std   0.072974  0.005807
min   3.097103  0.048674
max   3.471952  0.084536


100%|██████████| 100/100 [01:28<00:00,  1.14it/s]


GradientBoosting: MSE=0.0422, PFI=            x1        x2
mean  3.696067  0.000536
std   0.084199  0.000229
min   3.519706 -0.000117
max   3.870361  0.001258


100%|██████████| 100/100 [00:53<00:00,  1.85it/s]


HistGradientBoosting: MSE=0.0533, PFI=            x1        x2
mean  3.686006 -0.000650
std   0.077665  0.000488
min   3.470947 -0.001694
max   3.891828  0.000598


100%|██████████| 100/100 [00:36<00:00,  2.70it/s]


AdaBoost: MSE=0.5033, PFI=            x1   x2
mean  2.503713  0.0
std   0.064827  0.0
min   2.306841  0.0
max   2.647428  0.0


100%|██████████| 100/100 [00:33<00:00,  2.99it/s]


Bagging: MSE=0.0514, PFI=            x1        x2
mean  3.738787  0.001931
std   0.089859  0.000927
min   3.510884 -0.000559
max   3.998434  0.004372


100%|██████████| 100/100 [02:06<00:00,  1.26s/it]


MLPRegressor: MSE=0.0413, PFI=            x1        x2
mean  3.502292  0.019697
std   0.086638  0.002050
min   3.266302  0.014664
max   3.695055  0.026765


100%|██████████| 100/100 [05:49<00:00,  3.49s/it]


SVR: MSE=0.0413, PFI=            x1        x2
mean  2.901527  0.209362
std   0.076345  0.012636
min   2.701342  0.185711
max   3.056989  0.246902


100%|██████████| 100/100 [00:02<00:00, 43.45it/s]

KNN: MSE=0.0501, PFI=            x1        x2
mean  2.910489  0.534244
std   0.052216  0.038457
min   2.788326  0.458514
max   3.039082  0.643433





In [17]:
from sklearn.linear_model import (
    LinearRegression,
    Ridge,
    Lasso,
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    ExtraTreesRegressor,
    AdaBoostRegressor,
    BaggingRegressor,
    HistGradientBoostingRegressor,
)
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor

models = {
    "LinearRegression": LinearRegression(),
    "Lasso": Lasso(),
    "DecisionTree": DecisionTreeRegressor(random_state=42),
    "RandomForest": RandomForestRegressor(random_state=42),
    "ExtraTrees": ExtraTreesRegressor(random_state=42),
    "GradientBoosting": GradientBoostingRegressor(random_state=42),
    "HistGradientBoosting": HistGradientBoostingRegressor(random_state=42),
    "AdaBoost": AdaBoostRegressor(random_state=42),
    "Bagging": BaggingRegressor(random_state=42),
    "MLPRegressor": MLPRegressor(random_state=42, max_iter=1000),
    "SVR": SVR(),
    "KNN": KNeighborsRegressor(),
}
for cor in [0, 0.3, 0.6, 0.9]:
    X, y = dgp_bivariatenormal(N=1000, cor=cor)
    print(f"CORRELATION={cor}")
    print(X.corr())
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    for name, model in models.items():
        model.fit(X_train, y_train)
        mse = mean_squared_error(y_test, model.predict(X_test))
        pfi_scores = pfiss(model, X_train, y_train, X_test, y_test, repetitions=100)
        print(f"{name}: MSE={mse:.4f}, PFI={pfi_scores}")

CORRELATION=0
          x1        x2
x1  1.000000  0.031091
x2  0.031091  1.000000


100%|██████████| 100/100 [00:00<00:00, 167.57it/s]


LinearRegression: MSE=1.9893, PFI=            x1        x2
mean -0.014089 -0.015788
std   0.005850  0.006276
min  -0.025946 -0.027154
max  -0.001309 -0.000311


100%|██████████| 100/100 [00:00<00:00, 164.59it/s]


Lasso: MSE=1.9567, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 107.56it/s]


DecisionTree: MSE=0.0861, PFI=            x1        x2
mean  3.884663  0.003767
std   0.277064  0.005526
min   3.104267 -0.009207
max   4.378028  0.016591


100%|██████████| 100/100 [00:31<00:00,  3.14it/s]


RandomForest: MSE=0.0596, PFI=            x1        x2
mean  3.859791 -0.001532
std   0.269398  0.002715
min   3.212411 -0.007676
max   4.392601  0.005034


100%|██████████| 100/100 [00:17<00:00,  5.78it/s]


ExtraTrees: MSE=0.0618, PFI=            x1        x2
mean  3.899472 -0.003437
std   0.311407  0.002440
min   3.063279 -0.008733
max   4.630729  0.003955


100%|██████████| 100/100 [00:12<00:00,  7.85it/s]


GradientBoosting: MSE=0.0482, PFI=            x1        x2
mean  3.895849  0.002847
std   0.286048  0.001306
min   3.091033 -0.000283
max   4.486560  0.005527


100%|██████████| 100/100 [00:48<00:00,  2.07it/s]


HistGradientBoosting: MSE=0.1519, PFI=            x1        x2
mean  3.409924  0.001215
std   0.235027  0.015100
min   2.780866 -0.032218
max   3.856709  0.028756


100%|██████████| 100/100 [00:10<00:00,  9.86it/s]


AdaBoost: MSE=0.1740, PFI=            x1        x2
mean  3.216939  0.000894
std   0.252970  0.001548
min   2.187867 -0.000949
max   3.666714  0.007669


100%|██████████| 100/100 [00:04<00:00, 22.45it/s]


Bagging: MSE=0.0797, PFI=            x1        x2
mean  3.800250 -0.005033
std   0.337463  0.009430
min   2.668851 -0.024884
max   4.339455  0.019592


100%|██████████| 100/100 [00:46<00:00,  2.14it/s]


MLPRegressor: MSE=0.0564, PFI=            x1        x2
mean  3.738111 -0.001552
std   0.277721  0.002237
min   2.619581 -0.005682
max   4.285212  0.004806


100%|██████████| 100/100 [00:05<00:00, 19.44it/s]


SVR: MSE=0.1005, PFI=            x1        x2
mean  3.581477 -0.007146
std   0.227123  0.023903
min   3.064860 -0.035857
max   4.229636  0.096135


100%|██████████| 100/100 [00:00<00:00, 122.20it/s]


KNN: MSE=0.1374, PFI=            x1        x2
mean  3.346836  0.001785
std   0.255787  0.032268
min   2.669703 -0.048188
max   3.995456  0.130948
CORRELATION=0.3
          x1        x2
x1  1.000000  0.350065
x2  0.350065  1.000000


100%|██████████| 100/100 [00:00<00:00, 174.85it/s]


LinearRegression: MSE=1.9488, PFI=            x1        x2
mean -0.052836  0.042279
std   0.018071  0.021863
min  -0.089730 -0.002743
max  -0.017362  0.101307


100%|██████████| 100/100 [00:00<00:00, 169.74it/s]


Lasso: MSE=1.9080, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 110.32it/s]


DecisionTree: MSE=0.0746, PFI=            x1        x2
mean  3.673758  0.009564
std   0.268391  0.005673
min   2.897556 -0.000926
max   4.376965  0.031078


100%|██████████| 100/100 [00:31<00:00,  3.19it/s]


RandomForest: MSE=0.0513, PFI=            x1        x2
mean  3.707388  0.001533
std   0.282562  0.001964
min   2.880791 -0.003335
max   4.313338  0.007783


100%|██████████| 100/100 [00:16<00:00,  5.97it/s]


ExtraTrees: MSE=0.0500, PFI=            x1        x2
mean  3.802778  0.001651
std   0.302491  0.002398
min   2.871454 -0.002886
max   4.491046  0.010264


100%|██████████| 100/100 [00:12<00:00,  8.18it/s]


GradientBoosting: MSE=0.0491, PFI=            x1        x2
mean  3.607169  0.001115
std   0.265492  0.001238
min   2.993361 -0.001477
max   4.206072  0.004189


100%|██████████| 100/100 [00:46<00:00,  2.14it/s]


HistGradientBoosting: MSE=0.1759, PFI=            x1        x2
mean  3.526689 -0.009862
std   0.258264  0.014161
min   2.885147 -0.050013
max   4.047708  0.021765


100%|██████████| 100/100 [00:09<00:00, 10.17it/s]


AdaBoost: MSE=0.1772, PFI=            x1   x2
mean  3.079852  0.0
std   0.223057  0.0
min   2.404450  0.0
max   3.552139  0.0


100%|██████████| 100/100 [00:04<00:00, 22.11it/s]


Bagging: MSE=0.0528, PFI=            x1        x2
mean  3.780437  0.002147
std   0.262786  0.002411
min   3.023887 -0.002897
max   4.420651  0.010574


100%|██████████| 100/100 [00:52<00:00,  1.89it/s]


MLPRegressor: MSE=0.0476, PFI=            x1        x2
mean  3.603779  0.003251
std   0.246980  0.004205
min   2.871742 -0.003881
max   4.098662  0.018817


100%|██████████| 100/100 [00:04<00:00, 20.50it/s]


SVR: MSE=0.1085, PFI=            x1        x2
mean  3.192926  0.035619
std   0.218799  0.045343
min   2.749811 -0.030364
max   3.782612  0.142553


100%|██████████| 100/100 [00:00<00:00, 119.16it/s]


KNN: MSE=0.2142, PFI=            x1        x2
mean  3.041099  0.017097
std   0.217530  0.074495
min   2.292966 -0.113888
max   3.464127  0.197802
CORRELATION=0.6
          x1        x2
x1  1.000000  0.580217
x2  0.580217  1.000000


100%|██████████| 100/100 [00:00<00:00, 167.14it/s]


LinearRegression: MSE=2.2224, PFI=            x1        x2
mean -0.039652 -0.007535
std   0.019585  0.003274
min  -0.097704 -0.013362
max   0.016021  0.000599


100%|██████████| 100/100 [00:00<00:00, 162.62it/s]


Lasso: MSE=2.1696, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 106.66it/s]


DecisionTree: MSE=0.1023, PFI=            x1        x2
mean  4.217182 -0.007216
std   0.297460  0.005400
min   3.542750 -0.020093
max   4.966244  0.007322


100%|██████████| 100/100 [00:31<00:00,  3.14it/s]


RandomForest: MSE=0.0559, PFI=            x1        x2
mean  4.288521  0.001521
std   0.302953  0.002549
min   3.146040 -0.003904
max   4.957046  0.007232


100%|██████████| 100/100 [00:17<00:00,  5.83it/s]


ExtraTrees: MSE=0.0568, PFI=            x1        x2
mean  4.188020  0.001139
std   0.276174  0.003040
min   3.600842 -0.005358
max   4.689824  0.007548


100%|██████████| 100/100 [00:12<00:00,  7.80it/s]


GradientBoosting: MSE=0.0505, PFI=            x1        x2
mean  4.128936  0.001948
std   0.333352  0.001396
min   3.294192 -0.001540
max   4.791555  0.005268


100%|██████████| 100/100 [00:47<00:00,  2.09it/s]


HistGradientBoosting: MSE=0.1486, PFI=            x1        x2
mean  4.128470 -0.012288
std   0.290778  0.011056
min   3.330899 -0.038643
max   4.878138  0.016614


100%|██████████| 100/100 [00:09<00:00, 10.36it/s]


AdaBoost: MSE=0.2206, PFI=            x1   x2
mean  3.550180  0.0
std   0.277345  0.0
min   2.696556  0.0
max   4.115929  0.0


100%|██████████| 100/100 [00:04<00:00, 22.86it/s]


Bagging: MSE=0.0603, PFI=            x1        x2
mean  4.217087  0.000165
std   0.279467  0.002871
min   3.389277 -0.006259
max   4.795815  0.006639


100%|██████████| 100/100 [00:48<00:00,  2.06it/s]


MLPRegressor: MSE=0.0520, PFI=            x1        x2
mean  4.151631  0.005795
std   0.311187  0.005776
min   3.303274 -0.004087
max   5.027326  0.025788


100%|██████████| 100/100 [00:04<00:00, 20.28it/s]


SVR: MSE=0.0786, PFI=            x1        x2
mean  3.650873  0.081016
std   0.266415  0.044673
min   3.047549 -0.007650
max   4.342066  0.212960


100%|██████████| 100/100 [00:00<00:00, 124.83it/s]


KNN: MSE=0.1565, PFI=            x1        x2
mean  3.388538  0.192830
std   0.260955  0.081172
min   2.811593  0.014955
max   4.062671  0.362694
CORRELATION=0.9
          x1        x2
x1  1.000000  0.905969
x2  0.905969  1.000000


100%|██████████| 100/100 [00:00<00:00, 176.37it/s]


LinearRegression: MSE=1.7747, PFI=            x1        x2
mean -0.001275 -0.002642
std   0.002278  0.010416
min  -0.007401 -0.025774
max   0.003644  0.021758


100%|██████████| 100/100 [00:00<00:00, 168.94it/s]


Lasso: MSE=1.7673, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 111.36it/s]


DecisionTree: MSE=0.1070, PFI=            x1        x2
mean  3.529031 -0.006197
std   0.274525  0.005002
min   2.665093 -0.016246
max   4.221473  0.005041


100%|██████████| 100/100 [00:30<00:00,  3.27it/s]


RandomForest: MSE=0.0606, PFI=            x1        x2
mean  3.476925  0.005034
std   0.271251  0.002017
min   2.515354 -0.001065
max   4.033506  0.009607


100%|██████████| 100/100 [00:16<00:00,  6.05it/s]


ExtraTrees: MSE=0.0615, PFI=            x1        x2
mean  2.983012  0.071394
std   0.210524  0.021381
min   2.506586  0.033416
max   3.473641  0.139982


100%|██████████| 100/100 [00:12<00:00,  8.25it/s]


GradientBoosting: MSE=0.0549, PFI=            x1        x2
mean  3.472591 -0.001133
std   0.279755  0.001309
min   2.665446 -0.004192
max   3.990375  0.001804


100%|██████████| 100/100 [00:45<00:00,  2.17it/s]


HistGradientBoosting: MSE=0.1738, PFI=            x1        x2
mean  2.871322  0.017415
std   0.237392  0.007025
min   2.135303 -0.010818
max   3.345053  0.028121


100%|██████████| 100/100 [00:07<00:00, 13.69it/s]


AdaBoost: MSE=0.2172, PFI=            x1   x2
mean  2.996241  0.0
std   0.231967  0.0
min   2.335421  0.0
max   3.479361  0.0


100%|██████████| 100/100 [00:04<00:00, 22.66it/s]


Bagging: MSE=0.0720, PFI=            x1        x2
mean  3.395642  0.013247
std   0.244340  0.002408
min   2.671434  0.007579
max   3.954234  0.021047


100%|██████████| 100/100 [00:48<00:00,  2.05it/s]


MLPRegressor: MSE=0.0618, PFI=            x1        x2
mean  3.111387  0.071770
std   0.224338  0.021022
min   2.367988  0.023501
max   3.624928  0.130512


100%|██████████| 100/100 [00:04<00:00, 20.43it/s]


SVR: MSE=0.0995, PFI=            x1        x2
mean  2.172855  0.273005
std   0.162560  0.069685
min   1.640093  0.110503
max   2.632694  0.427104


100%|██████████| 100/100 [00:00<00:00, 121.05it/s]

KNN: MSE=0.0854, PFI=            x1        x2
mean  2.444552  0.685293
std   0.187168  0.142153
min   1.933683  0.398073
max   2.863029  1.048845





# Changing the sample size

In [2]:
from sklearn.linear_model import (
    LinearRegression,
    Ridge,
    Lasso,
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    ExtraTreesRegressor,
    AdaBoostRegressor,
    BaggingRegressor,
    HistGradientBoostingRegressor,
)
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor

models = {
    "LinearRegression": LinearRegression(),
    "Lasso": Lasso(),
    "DecisionTree": DecisionTreeRegressor(random_state=42),
    "RandomForest": RandomForestRegressor(random_state=42),
    "ExtraTrees": ExtraTreesRegressor(random_state=42),
    "GradientBoosting": GradientBoostingRegressor(random_state=42),
    "HistGradientBoosting": HistGradientBoostingRegressor(random_state=42),
    "AdaBoost": AdaBoostRegressor(random_state=42),
    "Bagging": BaggingRegressor(random_state=42),
    "MLPRegressor": MLPRegressor(random_state=42, max_iter=1000),
    "SVR": SVR(),
    "KNN": KNeighborsRegressor(),
}

# Fixed correlation
cor = 0.6

# Different sample sizes
for N in [20, 50, 100, 500, 1000]:
    X, y = dgp_bivariatenormal(N=N, cor=cor)
    print(f"SAMPLE SIZE N={N}, CORRELATION={cor}")
    print(X.corr())

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    for name, model in models.items():
        model.fit(X_train, y_train)
        mse = mean_squared_error(y_test, model.predict(X_test))
        pfi_scores = pfiss(model, X_train, y_train, X_test, y_test, repetitions=100)
        print(f"{name}: MSE={mse:.4f}, PFI={pfi_scores}")


SAMPLE SIZE N=20, CORRELATION=0.6
          x1        x2
x1  1.000000  0.489578
x2  0.489578  1.000000


100%|██████████| 100/100 [00:00<00:00, 177.28it/s]


LinearRegression: MSE=0.6288, PFI=            x1        x2
mean  0.125821  0.055614
std   0.083729  0.129125
min  -0.004025 -0.156530
max   0.230307  0.251996


100%|██████████| 100/100 [00:00<00:00, 171.21it/s]


Lasso: MSE=0.5442, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 163.68it/s]


DecisionTree: MSE=2.5142, PFI=            x1        x2
mean  0.738084 -0.387949
std   0.420684  0.870763
min   0.000000 -1.672711
max   1.198438  0.498296


100%|██████████| 100/100 [00:10<00:00,  9.27it/s]


RandomForest: MSE=0.2346, PFI=            x1        x2
mean  0.790612 -0.063978
std   0.538148  0.083897
min   0.000000 -0.195403
max   1.366836  0.047633


100%|██████████| 100/100 [00:07<00:00, 13.30it/s]


ExtraTrees: MSE=0.1793, PFI=            x1        x2
mean  0.474153 -0.019198
std   0.291958  0.017320
min   0.000000 -0.050558
max   0.877162  0.007614


100%|██████████| 100/100 [00:04<00:00, 20.98it/s]


GradientBoosting: MSE=0.1095, PFI=            x1        x2
mean  1.161770  0.006843
std   0.585836  0.025178
min  -0.000120 -0.029179
max   1.520405  0.043339


100%|██████████| 100/100 [00:06<00:00, 16.48it/s]


HistGradientBoosting: MSE=0.5442, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:07<00:00, 13.79it/s]


AdaBoost: MSE=0.1282, PFI=            x1        x2
mean  0.996693  0.065292
std   0.895914  0.144753
min  -0.015586 -0.028021
max   2.286625  0.399783


100%|██████████| 100/100 [00:02<00:00, 43.19it/s]


Bagging: MSE=0.5441, PFI=            x1        x2
mean  0.912874 -0.152816
std   0.541725  0.136689
min   0.000000 -0.348391
max   1.348433  0.051410


100%|██████████| 100/100 [00:12<00:00,  8.14it/s]


MLPRegressor: MSE=0.1520, PFI=            x1        x2
mean  0.752145  0.003638
std   0.539305  0.101640
min  -0.009778 -0.115491
max   1.515049  0.168408


100%|██████████| 100/100 [00:00<00:00, 162.04it/s]


SVR: MSE=0.1774, PFI=            x1        x2
mean  0.685817 -0.057902
std   0.430079  0.065141
min  -0.001381 -0.169103
max   1.200474  0.051273


100%|██████████| 100/100 [00:00<00:00, 135.80it/s]


KNN: MSE=0.2214, PFI=            x1        x2
mean  0.452173  0.022797
std   0.265502  0.155323
min   0.000000 -0.132476
max   0.695589  0.328692
SAMPLE SIZE N=50, CORRELATION=0.6
          x1        x2
x1  1.000000  0.545777
x2  0.545777  1.000000


100%|██████████| 100/100 [00:00<00:00, 164.06it/s]


LinearRegression: MSE=1.2439, PFI=            x1        x2
mean  0.023107  0.058265
std   0.009005  0.056937
min  -0.003452 -0.057389
max   0.046187  0.192337


100%|██████████| 100/100 [00:00<00:00, 159.99it/s]


Lasso: MSE=1.2264, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 161.19it/s]


DecisionTree: MSE=0.0883, PFI=            x1        x2
mean  1.145178  0.014714
std   0.378918  0.022430
min   0.322581 -0.012745
max   1.808248  0.088532


100%|██████████| 100/100 [00:11<00:00,  8.89it/s]


RandomForest: MSE=0.0985, PFI=            x1        x2
mean  0.728722  0.135141
std   0.298159  0.080827
min   0.026412 -0.002823
max   1.360803  0.239621


100%|██████████| 100/100 [00:07<00:00, 13.11it/s]


ExtraTrees: MSE=0.0869, PFI=            x1        x2
mean  1.090387  0.005267
std   0.373190  0.021768
min   0.125562 -0.032520
max   1.694345  0.067441


100%|██████████| 100/100 [00:04<00:00, 20.35it/s]


GradientBoosting: MSE=0.1088, PFI=            x1        x2
mean  1.142575 -0.033680
std   0.349374  0.017927
min   0.388808 -0.076164
max   1.786019  0.019161


100%|██████████| 100/100 [00:06<00:00, 14.48it/s]


HistGradientBoosting: MSE=1.1013, PFI=            x1        x2
mean -0.004820  0.029071
std   0.017836  0.020767
min  -0.042520 -0.009463
max   0.013153  0.059050


100%|██████████| 100/100 [00:07<00:00, 14.00it/s]


AdaBoost: MSE=0.0960, PFI=            x1        x2
mean  0.811070  0.022230
std   0.295187  0.010903
min  -0.010385 -0.009044
max   1.248684  0.043695


100%|██████████| 100/100 [00:02<00:00, 41.58it/s]


Bagging: MSE=0.2760, PFI=            x1        x2
mean  0.540829  0.299236
std   0.253040  0.183567
min  -0.097547 -0.018290
max   0.933273  0.545519


100%|██████████| 100/100 [00:16<00:00,  6.03it/s]


MLPRegressor: MSE=0.0997, PFI=            x1        x2
mean  1.279726 -0.023419
std   0.510318  0.023606
min  -0.036655 -0.068762
max   2.129805  0.056210


100%|██████████| 100/100 [00:00<00:00, 170.82it/s]


SVR: MSE=0.0570, PFI=            x1        x2
mean  0.926198  0.035598
std   0.345499  0.018708
min  -0.005569 -0.008910
max   1.436984  0.078021


100%|██████████| 100/100 [00:00<00:00, 149.23it/s]


KNN: MSE=0.1261, PFI=            x1        x2
mean  0.760277  0.064507
std   0.260873  0.049143
min   0.041351 -0.013029
max   1.171146  0.233163
SAMPLE SIZE N=100, CORRELATION=0.6
         x1       x2
x1  1.00000  0.62045
x2  0.62045  1.00000


100%|██████████| 100/100 [00:00<00:00, 177.20it/s]


LinearRegression: MSE=2.2887, PFI=            x1        x2
mean  0.031534  0.148883
std   0.018082  0.113811
min  -0.020521 -0.120473
max   0.067941  0.393406


100%|██████████| 100/100 [00:00<00:00, 174.10it/s]


Lasso: MSE=2.4676, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 167.06it/s]


DecisionTree: MSE=0.1536, PFI=            x1        x2
mean  4.344046 -0.026824
std   1.004586  0.021360
min   1.072138 -0.077282
max   5.796282  0.027913


100%|██████████| 100/100 [00:11<00:00,  8.66it/s]


RandomForest: MSE=0.1594, PFI=            x1        x2
mean  3.635217  0.016430
std   0.932084  0.014553
min   1.117745 -0.023971
max   4.931145  0.047664


100%|██████████| 100/100 [00:08<00:00, 12.13it/s]


ExtraTrees: MSE=0.0691, PFI=            x1        x2
mean  3.881806  0.057618
std   0.940420  0.027483
min   1.195376 -0.006689
max   5.431712  0.120130


100%|██████████| 100/100 [00:05<00:00, 18.29it/s]


GradientBoosting: MSE=0.0943, PFI=            x1        x2
mean  4.028842  0.011227
std   1.072231  0.012662
min   1.080295 -0.018180
max   5.461738  0.037439


100%|██████████| 100/100 [00:09<00:00, 11.03it/s]


HistGradientBoosting: MSE=1.4753, PFI=            x1        x2
mean  1.349166  0.334781
std   0.385835  0.232774
min   0.433575 -0.227434
max   2.290426  0.776800


100%|██████████| 100/100 [00:01<00:00, 64.12it/s]


AdaBoost: MSE=0.4406, PFI=            x1       x2
mean  3.556931 -0.12877
std   1.022308  0.05508
min   0.758089 -0.15753
max   4.848033  0.00000


100%|██████████| 100/100 [00:02<00:00, 41.55it/s]


Bagging: MSE=0.1628, PFI=            x1        x2
mean  4.412673 -0.015703
std   0.975377  0.017069
min   1.094945 -0.045901
max   5.948753  0.025289


100%|██████████| 100/100 [00:16<00:00,  6.04it/s]


MLPRegressor: MSE=0.1122, PFI=            x1        x2
mean  3.856570  0.126407
std   0.921449  0.081180
min   1.119480 -0.014865
max   5.471730  0.274652


100%|██████████| 100/100 [00:00<00:00, 156.39it/s]


SVR: MSE=0.2598, PFI=            x1        x2
mean  2.719318  0.480702
std   0.446401  0.228190
min   1.585798  0.043448
max   3.671328  0.833599


100%|██████████| 100/100 [00:00<00:00, 146.16it/s]


KNN: MSE=0.3899, PFI=            x1        x2
mean  2.713678  0.567583
std   0.719155  0.365618
min   0.636045 -0.030514
max   4.028196  1.822266
SAMPLE SIZE N=500, CORRELATION=0.6
          x1        x2
x1  1.000000  0.634254
x2  0.634254  1.000000


100%|██████████| 100/100 [00:00<00:00, 168.21it/s]


LinearRegression: MSE=2.7791, PFI=            x1        x2
mean -0.107474 -0.050719
std   0.032532  0.026014
min  -0.182426 -0.108980
max   0.005665  0.020148


100%|██████████| 100/100 [00:00<00:00, 166.76it/s]


Lasso: MSE=2.6143, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 137.64it/s]


DecisionTree: MSE=0.0976, PFI=            x1        x2
mean  4.581843  0.005351
std   0.461918  0.006500
min   3.366497 -0.007205
max   5.428254  0.023922


100%|██████████| 100/100 [00:19<00:00,  5.09it/s]


RandomForest: MSE=0.1376, PFI=            x1        x2
mean  4.341604  0.000242
std   0.510971  0.004098
min   2.766668 -0.012321
max   5.100132  0.009969


100%|██████████| 100/100 [00:12<00:00,  8.28it/s]


ExtraTrees: MSE=0.1041, PFI=            x1        x2
mean  4.536850 -0.004694
std   0.527153  0.007964
min   2.300772 -0.020281
max   5.303880  0.021417


100%|██████████| 100/100 [00:08<00:00, 12.02it/s]


GradientBoosting: MSE=0.0881, PFI=            x1        x2
mean  4.607039 -0.003881
std   0.475553  0.004026
min   3.238230 -0.011493
max   5.421728  0.008385


100%|██████████| 100/100 [00:27<00:00,  3.68it/s]


HistGradientBoosting: MSE=0.7843, PFI=            x1        x2
mean  2.921180  0.054568
std   0.357930  0.028239
min   1.960757 -0.028250
max   3.713603  0.116255


100%|██████████| 100/100 [00:08<00:00, 11.88it/s]


AdaBoost: MSE=0.1834, PFI=            x1        x2
mean  4.253149 -0.010487
std   0.439890  0.004253
min   2.834794 -0.015208
max   4.928362  0.001366


100%|██████████| 100/100 [00:03<00:00, 31.13it/s]


Bagging: MSE=0.1248, PFI=            x1        x2
mean  4.462069 -0.010548
std   0.403899  0.005057
min   3.124371 -0.022073
max   5.254719  0.007149


100%|██████████| 100/100 [00:33<00:00,  3.01it/s]


MLPRegressor: MSE=0.1834, PFI=            x1        x2
mean  3.948333  0.035878
std   0.517024  0.026388
min   1.835075 -0.015605
max   4.691010  0.114815


100%|██████████| 100/100 [00:01<00:00, 56.80it/s]


SVR: MSE=0.4147, PFI=            x1        x2
mean  3.207173  0.181271
std   0.367434  0.122699
min   2.186394 -0.057223
max   3.849168  0.468742


100%|██████████| 100/100 [00:00<00:00, 115.48it/s]


KNN: MSE=0.4528, PFI=            x1        x2
mean  3.325726  0.208760
std   0.313218  0.160287
min   2.478541 -0.096711
max   4.106219  0.661589
SAMPLE SIZE N=1000, CORRELATION=0.6
          x1        x2
x1  1.000000  0.587901
x2  0.587901  1.000000


100%|██████████| 100/100 [00:00<00:00, 172.01it/s]


LinearRegression: MSE=1.4890, PFI=            x1        x2
mean -0.008648  0.012873
std   0.002739  0.009248
min  -0.013630 -0.011538
max  -0.001801  0.030346


100%|██████████| 100/100 [00:00<00:00, 170.12it/s]


Lasso: MSE=1.4891, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 109.93it/s]


DecisionTree: MSE=0.0871, PFI=            x1        x2
mean  2.851289  0.001775
std   0.208143  0.005523
min   2.331619 -0.010591
max   3.262448  0.015362


100%|██████████| 100/100 [00:31<00:00,  3.18it/s]


RandomForest: MSE=0.0599, PFI=            x1        x2
mean  2.794266 -0.000575
std   0.202240  0.002172
min   2.272272 -0.005514
max   3.158072  0.006296


100%|██████████| 100/100 [00:17<00:00,  5.75it/s]


ExtraTrees: MSE=0.0569, PFI=            x1        x2
mean  2.826097  0.004170
std   0.199522  0.002973
min   2.262975 -0.003873
max   3.319881  0.011249


100%|██████████| 100/100 [00:12<00:00,  8.08it/s]


GradientBoosting: MSE=0.0530, PFI=            x1        x2
mean  2.790649  0.000843
std   0.197131  0.001142
min   2.122048 -0.001991
max   3.154833  0.003666


100%|██████████| 100/100 [00:48<00:00,  2.04it/s]


HistGradientBoosting: MSE=0.1419, PFI=            x1        x2
mean  2.548685 -0.005517
std   0.168332  0.006427
min   2.123485 -0.021544
max   2.885872  0.012128


100%|██████████| 100/100 [00:10<00:00,  9.63it/s]


AdaBoost: MSE=0.2347, PFI=            x1        x2
mean  2.183858  0.000412
std   0.156912  0.000196
min   1.606567  0.000000
max   2.518722  0.000589


100%|██████████| 100/100 [00:04<00:00, 22.21it/s]


Bagging: MSE=0.0628, PFI=            x1        x2
mean  2.813756  0.000604
std   0.207767  0.002663
min   2.100347 -0.006172
max   3.268059  0.008493


100%|██████████| 100/100 [00:54<00:00,  1.85it/s]


MLPRegressor: MSE=0.0613, PFI=            x1        x2
mean  2.678460  0.006458
std   0.174959  0.004887
min   2.178767 -0.003036
max   3.081866  0.019081


100%|██████████| 100/100 [00:04<00:00, 20.03it/s]


SVR: MSE=0.0902, PFI=            x1        x2
mean  2.391671  0.048634
std   0.170527  0.034575
min   1.993122 -0.021076
max   2.854917  0.124132


100%|██████████| 100/100 [00:00<00:00, 117.69it/s]

KNN: MSE=0.1277, PFI=            x1        x2
mean  2.283211  0.136444
std   0.178833  0.063604
min   1.667553  0.004293
max   2.780742  0.277674





In [3]:
from sklearn.linear_model import (
    LinearRegression,
    Ridge,
    Lasso,
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    ExtraTreesRegressor,
    AdaBoostRegressor,
    BaggingRegressor,
    HistGradientBoostingRegressor,
)
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor

models = {
    "LinearRegression": LinearRegression(),
    "Lasso": Lasso(),
    "DecisionTree": DecisionTreeRegressor(random_state=42),
    "RandomForest": RandomForestRegressor(random_state=42),
    "ExtraTrees": ExtraTreesRegressor(random_state=42),
    "GradientBoosting": GradientBoostingRegressor(random_state=42),
    "HistGradientBoosting": HistGradientBoostingRegressor(random_state=42),
    "AdaBoost": AdaBoostRegressor(random_state=42),
    "Bagging": BaggingRegressor(random_state=42),
    "MLPRegressor": MLPRegressor(random_state=42, max_iter=1000),
    "SVR": SVR(),
    "KNN": KNeighborsRegressor(),
}

# Fixed correlation
cor = 0.6

# Different sample sizes
for N in [20, 50, 100, 500, 1000, 10000]:
    X, y = dgp_bivariatenormal(N=N, cor=cor)
    print(f"SAMPLE SIZE N={N}, CORRELATION={cor}")
    print(X.corr())

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    for name, model in models.items():
        model.fit(X_train, y_train)
        mse = mean_squared_error(y_test, model.predict(X_test))
        pfi_scores = pfiss(model, X_train, y_train, X_test, y_test, repetitions=100)
        print(f"{name}: MSE={mse:.4f}, PFI={pfi_scores}")


SAMPLE SIZE N=20, CORRELATION=0.6
          x1        x2
x1  1.000000  0.430252
x2  0.430252  1.000000


100%|██████████| 100/100 [00:00<00:00, 177.09it/s]


LinearRegression: MSE=0.5924, PFI=            x1        x2
mean -0.173774  0.022111
std   0.160656  0.018947
min  -0.398282 -0.006414
max   0.118713  0.050395


100%|██████████| 100/100 [00:00<00:00, 173.21it/s]


Lasso: MSE=0.3421, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 170.64it/s]


DecisionTree: MSE=0.0505, PFI=            x1        x2
mean  0.423958 -0.012336
std   0.275134  0.007986
min   0.000000 -0.027975
max   0.892126  0.001537


100%|██████████| 100/100 [00:10<00:00,  9.71it/s]


RandomForest: MSE=0.0277, PFI=            x1        x2
mean  0.381305  0.004621
std   0.211244  0.009295
min   0.000000 -0.012630
max   0.713125  0.019352


100%|██████████| 100/100 [00:07<00:00, 13.81it/s]


ExtraTrees: MSE=0.0358, PFI=            x1        x2
mean  0.437753 -0.005564
std   0.243565  0.004682
min  -0.004128 -0.012788
max   0.856049  0.001708


100%|██████████| 100/100 [00:04<00:00, 21.65it/s]


GradientBoosting: MSE=0.0388, PFI=            x1        x2
mean  0.470183 -0.003548
std   0.236382  0.004633
min  -0.006192 -0.012345
max   0.896968  0.004455


100%|██████████| 100/100 [00:05<00:00, 17.74it/s]


HistGradientBoosting: MSE=0.3421, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:07<00:00, 13.94it/s]


AdaBoost: MSE=0.0578, PFI=            x1        x2
mean  0.370401 -0.007108
std   0.213063  0.010759
min   0.000000 -0.025167
max   0.719233  0.008829


100%|██████████| 100/100 [00:02<00:00, 44.23it/s]


Bagging: MSE=0.0723, PFI=            x1        x2
mean  0.379807 -0.017278
std   0.248844  0.016056
min  -0.002953 -0.043538
max   0.724940  0.009176


100%|██████████| 100/100 [00:07<00:00, 13.83it/s]


MLPRegressor: MSE=0.0542, PFI=            x1        x2
mean  0.349774 -0.000062
std   0.202692  0.016371
min  -0.014662 -0.026634
max   0.734796  0.026200


100%|██████████| 100/100 [00:00<00:00, 172.42it/s]


SVR: MSE=0.0516, PFI=            x1        x2
mean  0.384419 -0.004683
std   0.210798  0.010085
min  -0.006682 -0.022590
max   0.753557  0.011815


100%|██████████| 100/100 [00:00<00:00, 145.14it/s]


KNN: MSE=0.1229, PFI=            x1        x2
mean  0.110682  0.061015
std   0.082927  0.066606
min  -0.062810 -0.061282
max   0.252827  0.164665
SAMPLE SIZE N=50, CORRELATION=0.6
          x1        x2
x1  1.000000  0.470591
x2  0.470591  1.000000


100%|██████████| 100/100 [00:00<00:00, 177.14it/s]


LinearRegression: MSE=2.6058, PFI=            x1        x2
mean  2.213898 -0.183962
std   0.969890  0.249573
min   0.241172 -0.636559
max   4.010491  0.288325


100%|██████████| 100/100 [00:00<00:00, 172.34it/s]


Lasso: MSE=4.1346, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 165.69it/s]


DecisionTree: MSE=0.4155, PFI=            x1        x2
mean  5.973394 -0.073134
std   2.107147  0.040329
min   0.233174 -0.119444
max   8.246080  0.021142


100%|██████████| 100/100 [00:10<00:00,  9.20it/s]


RandomForest: MSE=0.6446, PFI=            x1        x2
mean  5.479400 -0.025630
std   1.851726  0.028243
min   0.932754 -0.074633
max   7.516396  0.007864


100%|██████████| 100/100 [00:07<00:00, 13.36it/s]


ExtraTrees: MSE=0.3790, PFI=            x1        x2
mean  5.846879  0.042726
std   1.939466  0.052859
min   1.046858 -0.077152
max   8.259348  0.120619


100%|██████████| 100/100 [00:04<00:00, 20.44it/s]


GradientBoosting: MSE=0.3696, PFI=            x1        x2
mean  5.652396  0.056992
std   2.046796  0.081294
min   0.840487 -0.066781
max   8.263371  0.190159


100%|██████████| 100/100 [00:07<00:00, 14.20it/s]


HistGradientBoosting: MSE=3.6449, PFI=            x1        x2
mean  0.454929  0.109322
std   0.323093  0.074230
min  -0.075156 -0.016300
max   0.944741  0.231737


100%|██████████| 100/100 [00:07<00:00, 13.35it/s]


AdaBoost: MSE=1.0430, PFI=            x1        x2
mean  4.832447 -0.234235
std   1.546636  0.348073
min   0.054632 -0.743890
max   7.360585  0.011921


100%|██████████| 100/100 [00:02<00:00, 41.41it/s]


Bagging: MSE=0.9519, PFI=            x1        x2
mean  4.846190 -0.121481
std   1.604918  0.134481
min   0.160778 -0.348578
max   7.220750  0.001938


100%|██████████| 100/100 [00:12<00:00,  8.09it/s]


MLPRegressor: MSE=0.4431, PFI=            x1        x2
mean  4.948729  0.131529
std   2.041184  0.214506
min   0.699514 -0.113103
max   8.186203  0.614343


100%|██████████| 100/100 [00:00<00:00, 161.22it/s]


SVR: MSE=1.4463, PFI=            x1        x2
mean  3.259981  0.371190
std   1.374302  0.322399
min   0.262804 -0.038680
max   5.296090  1.163433


100%|██████████| 100/100 [00:00<00:00, 140.01it/s]


KNN: MSE=1.2637, PFI=            x1        x2
mean  3.610976  0.158874
std   1.539687  0.169342
min   0.071541 -0.019822
max   5.479887  0.616779
SAMPLE SIZE N=100, CORRELATION=0.6
          x1        x2
x1  1.000000  0.585996
x2  0.585996  1.000000


100%|██████████| 100/100 [00:00<00:00, 165.73it/s]


LinearRegression: MSE=2.5607, PFI=            x1        x2
mean  0.138469  0.000572
std   0.201988  0.001469
min  -0.339645 -0.002640
max   0.648876  0.004440


100%|██████████| 100/100 [00:00<00:00, 164.75it/s]


Lasso: MSE=2.6226, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 158.30it/s]


DecisionTree: MSE=0.0557, PFI=            x1        x2
mean  4.981272  0.016679
std   1.039482  0.024615
min   2.415055 -0.008611
max   7.425219  0.074486


100%|██████████| 100/100 [00:11<00:00,  8.34it/s]


RandomForest: MSE=0.0570, PFI=            x1        x2
mean  5.029107 -0.000723
std   1.242860  0.003247
min   0.827672 -0.007360
max   6.943477  0.007468


100%|██████████| 100/100 [00:08<00:00, 11.96it/s]


ExtraTrees: MSE=0.0676, PFI=            x1        x2
mean  4.859762 -0.018481
std   0.921836  0.011949
min   2.407123 -0.048156
max   7.323309  0.012615


100%|██████████| 100/100 [00:05<00:00, 18.16it/s]


GradientBoosting: MSE=0.0469, PFI=            x1        x2
mean  4.748631 -0.000261
std   1.154817  0.006593
min   1.707603 -0.018645
max   7.243828  0.018827


100%|██████████| 100/100 [00:08<00:00, 11.28it/s]


HistGradientBoosting: MSE=1.5213, PFI=            x1        x2
mean  1.985703  0.039282
std   0.688691  0.192100
min   0.372997 -0.412030
max   3.452960  0.431898


100%|██████████| 100/100 [00:06<00:00, 15.40it/s]


AdaBoost: MSE=0.0603, PFI=            x1        x2
mean  4.881588  0.001295
std   0.993158  0.002837
min   2.428130 -0.005709
max   7.123359  0.007762


100%|██████████| 100/100 [00:02<00:00, 41.58it/s]


Bagging: MSE=0.0899, PFI=            x1        x2
mean  4.946509  0.004706
std   1.146808  0.006575
min   2.148674 -0.016445
max   7.580395  0.020699


100%|██████████| 100/100 [00:16<00:00,  5.90it/s]


MLPRegressor: MSE=0.2083, PFI=            x1        x2
mean  4.107800  0.051332
std   1.044415  0.075230
min   2.013835 -0.085144
max   7.018229  0.261013


100%|██████████| 100/100 [00:00<00:00, 159.68it/s]


SVR: MSE=0.4678, PFI=            x1        x2
mean  3.360523  0.120596
std   0.812484  0.139534
min   0.738996 -0.126722
max   5.064491  0.531199


100%|██████████| 100/100 [00:00<00:00, 146.29it/s]


KNN: MSE=0.7316, PFI=            x1        x2
mean  2.996866  0.316011
std   0.750322  0.402140
min   1.608170 -0.411682
max   4.725689  1.390863
SAMPLE SIZE N=500, CORRELATION=0.6
          x1        x2
x1  1.000000  0.567528
x2  0.567528  1.000000


100%|██████████| 100/100 [00:00<00:00, 170.32it/s]


LinearRegression: MSE=2.4245, PFI=            x1        x2
mean -0.001337  0.041688
std   0.006716  0.044170
min  -0.019018 -0.052868
max   0.014457  0.152380


100%|██████████| 100/100 [00:00<00:00, 167.11it/s]


Lasso: MSE=2.4409, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 139.79it/s]


DecisionTree: MSE=0.0836, PFI=            x1        x2
mean  4.881383 -0.006680
std   0.578336  0.007985
min   3.544995 -0.025960
max   6.393179  0.013720


100%|██████████| 100/100 [00:19<00:00,  5.13it/s]


RandomForest: MSE=0.0464, PFI=            x1        x2
mean  4.674613 -0.001952
std   0.478418  0.002360
min   3.122525 -0.007825
max   5.748551  0.004209


100%|██████████| 100/100 [00:11<00:00,  8.39it/s]


ExtraTrees: MSE=0.0498, PFI=            x1        x2
mean  4.677409 -0.000049
std   0.487062  0.003680
min   3.388478 -0.010107
max   5.843975  0.008786


100%|██████████| 100/100 [00:08<00:00, 11.96it/s]


GradientBoosting: MSE=0.0423, PFI=            x1        x2
mean  4.720475  0.002021
std   0.450979  0.002136
min   3.748511 -0.002718
max   5.966132  0.008760


100%|██████████| 100/100 [00:27<00:00,  3.60it/s]


HistGradientBoosting: MSE=0.2395, PFI=            x1        x2
mean  3.885120  0.023523
std   0.383866  0.018038
min   2.575186 -0.032082
max   4.643765  0.059741


100%|██████████| 100/100 [00:08<00:00, 11.87it/s]


AdaBoost: MSE=0.1177, PFI=            x1        x2
mean  4.329576 -0.002101
std   0.406630  0.001050
min   3.322396 -0.003721
max   5.143742  0.000022


100%|██████████| 100/100 [00:03<00:00, 30.43it/s]


Bagging: MSE=0.0502, PFI=            x1        x2
mean  4.633917 -0.001368
std   0.453569  0.003137
min   3.663157 -0.009681
max   5.840901  0.006663


100%|██████████| 100/100 [00:38<00:00,  2.61it/s]


MLPRegressor: MSE=0.0556, PFI=            x1        x2
mean  4.243460  0.029023
std   0.425220  0.013330
min   3.133367  0.005544
max   5.118861  0.067142


100%|██████████| 100/100 [00:01<00:00, 55.74it/s]


SVR: MSE=0.0676, PFI=            x1        x2
mean  3.928877  0.140169
std   0.401833  0.059360
min   2.856944  0.010466
max   4.927918  0.324389


100%|██████████| 100/100 [00:00<00:00, 133.50it/s]


KNN: MSE=0.1514, PFI=            x1        x2
mean  3.662580  0.319838
std   0.340425  0.118423
min   2.731596  0.109176
max   4.595262  0.581725
SAMPLE SIZE N=1000, CORRELATION=0.6
          x1        x2
x1  1.000000  0.613725
x2  0.613725  1.000000


100%|██████████| 100/100 [00:00<00:00, 172.30it/s]


LinearRegression: MSE=3.6593, PFI=            x1        x2
mean  0.012438  0.002490
std   0.056146  0.016585
min  -0.140043 -0.042780
max   0.130505  0.044253


100%|██████████| 100/100 [00:00<00:00, 169.45it/s]


Lasso: MSE=3.6016, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:00<00:00, 107.50it/s]


DecisionTree: MSE=0.2055, PFI=            x1        x2
mean  6.106173  0.008357
std   0.444021  0.008039
min   4.975292 -0.022456
max   7.004140  0.032830


100%|██████████| 100/100 [00:32<00:00,  3.09it/s]


RandomForest: MSE=0.2397, PFI=            x1        x2
mean  5.797679  0.002324
std   0.403276  0.003630
min   4.629866 -0.007473
max   6.738849  0.022151


100%|██████████| 100/100 [00:16<00:00,  5.94it/s]


ExtraTrees: MSE=0.1804, PFI=            x1        x2
mean  6.072654 -0.006799
std   0.467681  0.006570
min   4.708983 -0.030617
max   6.992365  0.016118


100%|██████████| 100/100 [00:12<00:00,  8.06it/s]


GradientBoosting: MSE=0.1933, PFI=            x1        x2
mean  6.078571 -0.011277
std   0.471957  0.004264
min   4.240662 -0.029617
max   7.099962  0.003355


100%|██████████| 100/100 [00:48<00:00,  2.05it/s]


HistGradientBoosting: MSE=0.7666, PFI=            x1        x2
mean  4.493571  0.088591
std   0.390366  0.029760
min   3.630958 -0.006907
max   5.259736  0.125170


100%|██████████| 100/100 [00:05<00:00, 17.76it/s]


AdaBoost: MSE=0.4270, PFI=            x1        x2
mean  5.164776 -0.000534
std   0.440880  0.010619
min   3.933108 -0.074458
max   5.940010  0.003554


100%|██████████| 100/100 [00:04<00:00, 22.04it/s]


Bagging: MSE=0.2823, PFI=            x1        x2
mean  5.765820 -0.009649
std   0.458335  0.008545
min   4.448755 -0.082845
max   6.624735  0.007757


100%|██████████| 100/100 [00:50<00:00,  1.99it/s]


MLPRegressor: MSE=0.1433, PFI=            x1        x2
mean  5.786965  0.067169
std   0.455666  0.024540
min   4.583997  0.016105
max   6.654629  0.136900


100%|██████████| 100/100 [00:04<00:00, 21.25it/s]


SVR: MSE=0.5876, PFI=            x1        x2
mean  4.360625  0.295274
std   0.290713  0.106195
min   3.681318  0.065198
max   5.021963  0.631641


100%|██████████| 100/100 [00:00<00:00, 121.60it/s]


KNN: MSE=0.5158, PFI=            x1        x2
mean  4.580018  0.461826
std   0.359820  0.121379
min   3.602573  0.165979
max   5.273675  0.760455
SAMPLE SIZE N=10000, CORRELATION=0.6
          x1        x2
x1  1.000000  0.589737
x2  0.589737  1.000000


100%|██████████| 100/100 [00:00<00:00, 157.29it/s]


LinearRegression: MSE=2.2770, PFI=            x1        x2
mean  0.001938  0.000200
std   0.002248  0.000213
min  -0.002722 -0.000295
max   0.007060  0.000747


100%|██████████| 100/100 [00:00<00:00, 167.23it/s]


Lasso: MSE=2.2779, PFI=       x1   x2
mean  0.0  0.0
std   0.0  0.0
min   0.0  0.0
max   0.0  0.0


100%|██████████| 100/100 [00:05<00:00, 17.92it/s]


DecisionTree: MSE=0.0827, PFI=            x1        x2
mean  4.477165 -0.002543
std   0.100264  0.001815
min   4.236423 -0.006610
max   4.753704  0.001874


100%|██████████| 100/100 [05:36<00:00,  3.37s/it]


RandomForest: MSE=0.0478, PFI=            x1        x2
mean  4.454774  0.000214
std   0.098548  0.000652
min   4.152953 -0.001528
max   4.774077  0.001593


100%|██████████| 100/100 [02:08<00:00,  1.28s/it]


ExtraTrees: MSE=0.0525, PFI=            x1        x2
mean  4.454439  0.000431
std   0.097013  0.000958
min   4.151469 -0.002503
max   4.697747  0.002894


100%|██████████| 100/100 [01:29<00:00,  1.12it/s]


GradientBoosting: MSE=0.0413, PFI=            x1        x2
mean  4.483758 -0.000005
std   0.093530  0.000146
min   4.241365 -0.000466
max   4.717610  0.000404


100%|██████████| 100/100 [00:57<00:00,  1.74it/s]


HistGradientBoosting: MSE=0.0683, PFI=            x1        x2
mean  4.277361  0.004396
std   0.092922  0.001220
min   4.024038  0.000039
max   4.493785  0.007654


100%|██████████| 100/100 [00:38<00:00,  2.60it/s]


AdaBoost: MSE=0.4426, PFI=            x1            x2
mean  3.231644 -1.628965e-07
std   0.087133  1.628965e-06
min   2.976464 -1.628965e-05
max   3.443671  0.000000e+00


100%|██████████| 100/100 [00:35<00:00,  2.84it/s]


Bagging: MSE=0.0500, PFI=            x1        x2
mean  4.474123  0.000665
std   0.112942  0.000841
min   4.189285 -0.001172
max   4.706251  0.002443


100%|██████████| 100/100 [02:31<00:00,  1.52s/it]


MLPRegressor: MSE=0.0434, PFI=            x1        x2
mean  4.409951  0.001908
std   0.103958  0.000872
min   4.183505  0.000272
max   4.612784  0.003820


100%|██████████| 100/100 [05:50<00:00,  3.50s/it]


SVR: MSE=0.0511, PFI=            x1        x2
mean  4.091667  0.060159
std   0.087541  0.017846
min   3.875798  0.026659
max   4.315056  0.124823


100%|██████████| 100/100 [00:02<00:00, 43.46it/s]

KNN: MSE=0.0624, PFI=            x1        x2
mean  4.010720  0.129615
std   0.087894  0.022352
min   3.831738  0.065375
max   4.217050  0.182765





In [6]:
import pandas as pd

# Storage list
results = []

# Fixed correlation
cor = 0.6

# Different sample sizes
for N in [20, 50, 100, 1000, 10000]:
    X, y = dgp_bivariatenormal(N=N, cor=cor)
    print(f"SAMPLE SIZE N={N}, CORRELATION={cor}")
    print(X.corr())

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    for name, model in models.items():
        model.fit(X_train, y_train)
        mse = mean_squared_error(y_test, model.predict(X_test))

        # PFI dictionary: {'x1': [...], 'x2': [...]}
        pfi_scores = pfiss(model, X_train, y_train, X_test, y_test, repetitions=100)

        x1_mean = float(pd.Series(pfi_scores["x1"]).mean())
        x1_std  = float(pd.Series(pfi_scores["x1"]).std())
        x2_mean = float(pd.Series(pfi_scores["x2"]).mean())
        x2_std  = float(pd.Series(pfi_scores["x2"]).std())

        results.append({
            "N": N,
            "Model": name,
            "MSE": mse,
            "x1_mean": x1_mean,
            "x1_std": x1_std,
            "x2_mean": x2_mean,
            "x2_std": x2_std
        })

# Save to CSV
df_results = pd.DataFrame(results)
df_results.to_csv("../../results/csv/heatmap_n_results.csv", index=False)

print("Saved results to pfi_results.csv")


SAMPLE SIZE N=20, CORRELATION=0.6
          x1        x2
x1  1.000000  0.838809
x2  0.838809  1.000000


100%|██████████| 100/100 [00:00<00:00, 178.53it/s]
100%|██████████| 100/100 [00:00<00:00, 174.18it/s]
100%|██████████| 100/100 [00:00<00:00, 172.15it/s]
100%|██████████| 100/100 [00:10<00:00,  9.70it/s]
100%|██████████| 100/100 [00:07<00:00, 13.86it/s]
100%|██████████| 100/100 [00:04<00:00, 21.84it/s]
100%|██████████| 100/100 [00:05<00:00, 17.68it/s]
100%|██████████| 100/100 [00:07<00:00, 14.01it/s]
100%|██████████| 100/100 [00:02<00:00, 43.81it/s]
100%|██████████| 100/100 [00:11<00:00,  8.40it/s]
100%|██████████| 100/100 [00:00<00:00, 176.61it/s]
100%|██████████| 100/100 [00:00<00:00, 147.07it/s]


SAMPLE SIZE N=50, CORRELATION=0.6
          x1        x2
x1  1.000000  0.581936
x2  0.581936  1.000000


100%|██████████| 100/100 [00:00<00:00, 164.10it/s]
100%|██████████| 100/100 [00:00<00:00, 179.06it/s]
100%|██████████| 100/100 [00:00<00:00, 170.74it/s]
100%|██████████| 100/100 [00:10<00:00,  9.16it/s]
100%|██████████| 100/100 [00:07<00:00, 13.33it/s]
100%|██████████| 100/100 [00:04<00:00, 20.82it/s]
100%|██████████| 100/100 [00:06<00:00, 14.79it/s]
100%|██████████| 100/100 [00:07<00:00, 14.08it/s]
100%|██████████| 100/100 [00:02<00:00, 43.00it/s]
100%|██████████| 100/100 [00:19<00:00,  5.25it/s]
100%|██████████| 100/100 [00:00<00:00, 168.79it/s]
100%|██████████| 100/100 [00:00<00:00, 147.65it/s]


SAMPLE SIZE N=100, CORRELATION=0.6
          x1        x2
x1  1.000000  0.642189
x2  0.642189  1.000000


100%|██████████| 100/100 [00:00<00:00, 178.03it/s]
100%|██████████| 100/100 [00:00<00:00, 172.14it/s]
100%|██████████| 100/100 [00:00<00:00, 168.17it/s]
100%|██████████| 100/100 [00:11<00:00,  8.47it/s]
100%|██████████| 100/100 [00:08<00:00, 12.41it/s]
100%|██████████| 100/100 [00:05<00:00, 19.21it/s]
100%|██████████| 100/100 [00:08<00:00, 11.30it/s]
100%|██████████| 100/100 [00:03<00:00, 28.81it/s]
100%|██████████| 100/100 [00:02<00:00, 42.29it/s]
100%|██████████| 100/100 [00:26<00:00,  3.81it/s]
100%|██████████| 100/100 [00:00<00:00, 166.41it/s]
100%|██████████| 100/100 [00:00<00:00, 150.08it/s]


SAMPLE SIZE N=1000, CORRELATION=0.6
          x1        x2
x1  1.000000  0.591975
x2  0.591975  1.000000


100%|██████████| 100/100 [00:00<00:00, 176.78it/s]
100%|██████████| 100/100 [00:00<00:00, 173.17it/s]
100%|██████████| 100/100 [00:00<00:00, 110.79it/s]
100%|██████████| 100/100 [00:31<00:00,  3.15it/s]
100%|██████████| 100/100 [00:16<00:00,  5.91it/s]
100%|██████████| 100/100 [00:12<00:00,  8.08it/s]
100%|██████████| 100/100 [00:47<00:00,  2.09it/s]
100%|██████████| 100/100 [00:06<00:00, 14.42it/s]
100%|██████████| 100/100 [00:04<00:00, 22.44it/s]
100%|██████████| 100/100 [00:49<00:00,  2.00it/s]
100%|██████████| 100/100 [00:04<00:00, 20.40it/s]
100%|██████████| 100/100 [00:00<00:00, 126.57it/s]


SAMPLE SIZE N=10000, CORRELATION=0.6
          x1        x2
x1  1.000000  0.599724
x2  0.599724  1.000000


100%|██████████| 100/100 [00:00<00:00, 166.55it/s]
100%|██████████| 100/100 [00:00<00:00, 172.66it/s]
100%|██████████| 100/100 [00:05<00:00, 18.86it/s]
100%|██████████| 100/100 [05:30<00:00,  3.31s/it]
100%|██████████| 100/100 [02:06<00:00,  1.27s/it]
100%|██████████| 100/100 [01:28<00:00,  1.13it/s]
100%|██████████| 100/100 [00:55<00:00,  1.81it/s]
100%|██████████| 100/100 [00:36<00:00,  2.71it/s]
100%|██████████| 100/100 [00:34<00:00,  2.92it/s]
100%|██████████| 100/100 [02:01<00:00,  1.21s/it]
100%|██████████| 100/100 [06:10<00:00,  3.71s/it]
100%|██████████| 100/100 [00:02<00:00, 43.35it/s]

Saved results to pfi_results.csv



