In [1]:
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["VECLIB_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"

from tqdm import tqdm

import numpy as np
from scipy.stats import norm

import numpy as np
import source.pipelineprocesser as plp

import matplotlib.pyplot as plt

In [30]:
# definition of pipeline option1
def option1():
    X, y = plp.make_dataset()
    y = plp.mean_value_imputation(X, y)

    O = plp.soft_ipod(X, y, 0.02)
    X, y = plp.remove_outliers(X, y, O)

    M = plp.marginal_screening(X, y, 5)
    X = plp.extract_features(X, M)

    M1 = plp.stepwise_feature_selection(X, y, 3)
    M2 = plp.lasso(X, y, 0.08)
    M = plp.union(M1, M2)
    # X = plp.extract_features(X, M)

    # M = plp.stepwise_feature_selection(X, y, 5)

    return plp.make_pipeline(output=M)

def option_parallel():
    X, y = plp.make_dataset()
    y = plp.mean_value_imputation(X, y)

    O = plp.soft_ipod(X, y, 0.02)
    X, y = plp.remove_outliers(X, y, O)

    M = plp.marginal_screening(X, y, 15)
    X = plp.extract_features(X, M)

    M1 = plp.stepwise_feature_selection(X, y, 10)
    M2 = plp.lasso(X, y, 0.04)
    M = plp.intersection(M1, M2)

    X = plp.extract_features(X, M)

    M1 = plp.stepwise_feature_selection(X, y, 3)
    M2 = plp.lasso(X, y, 0.08)
    M = plp.union(M1, M2)
    return plp.make_pipeline(output=M)

def option_serial():
    X, y = plp.make_dataset()
    y = plp.mean_value_imputation(X, y)

    O = plp.soft_ipod(X, y, 0.02)
    X, y = plp.remove_outliers(X, y, O)

    M = plp.marginal_screening(X, y, 15)
    X = plp.extract_features(X, M)

    M = plp.lasso(X, y, 0.04)
    X = plp.extract_features(X, M)

    M = plp.stepwise_feature_selection(X, y, 6)
    X = plp.extract_features(X, M)

    M1 = plp.stepwise_feature_selection(X, y, 3)
    M2 = plp.lasso(X, y, 0.08)
    M = plp.union(M1, M2)
    return plp.make_pipeline(output=M)

def option1_cv():
    X, y = plp.make_dataset()
    y = plp.mean_value_imputation(X, y)

    O = plp.soft_ipod(X, y, 0.02, {0.02, 0.018})
    X, y = plp.remove_outliers(X, y, O)

    M = plp.marginal_screening(X, y, 5, {3, 5})
    X = plp.extract_features(X, M)

    M1 = plp.stepwise_feature_selection(X, y, 3, {2, 3})
    M2 = plp.lasso(X, y, 0.08, {0.08, 0.12})
    M = plp.union(M1, M2)
    return plp.make_pipeline(output=M)



def option2_cv():
    X, y = plp.make_dataset()
    y = plp.definite_regression_imputation(X, y)

    M = plp.marginal_screening(X, y, 5, {3, 5})
    X = plp.extract_features(X, M)

    O = plp.cook_distance(X, y, 3.0, {2.0, 3.0})
    X, y = plp.remove_outliers(X, y, O)

    M1 = plp.stepwise_feature_selection(X, y, 3, {2, 3})
    M2 = plp.lasso(X, y, 0.08, {0.08, 0.12})
    M = plp.intersection(M1, M2)
    return plp.make_pipeline(output=M)

n, p = 800, 320
pl = option1()
# pl = plp.make_pipeline(output=plp.lasso(*plp.make_dataset(), 0.04))

rng = np.random.default_rng(0)

for _ in tqdm(range(1)):
    X, y = rng.normal(size=(n, p)), rng.normal(size=n)
    # pl = plp.make_pipelines(option1_cv(), option2_cv())
    # pl.tune(X, y, n_iters=[16, 16], cv=5, random_state=0)  # not n_iter but n_iters for MultiPipeline, fix seed
    M, _ = pl(X, y)
    if len(M) == 0:
        print("hoge")
        continue
    index = rng.choice(len(M))
    hoge1, hoge2 = pl.inference(X, y, 1.0, index)
    print(hoge1, hoge2)


100%|██████████| 1/1 [00:01<00:00,  1.14s/it]

55 0.8219224083679386





In [11]:
import pickle
import numpy as np


with open("time/results_default/200_20_0.0_0.pkl", "rb") as f:
    data = pickle.load(f)

results, times = zip(*data)
print(np.mean(times), len(times))


0.11389502573013306 1000
