In [78]:
import numpy as np
import pandas as pd
import openml
import lccv
import os, psutil
import gc
import logging
import traceback

from func_timeout import func_timeout, FunctionTimedOut

import time
import random

import itertools as it
import scipy.stats
from scipy.sparse import lil_matrix
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

import sklearn
from sklearn import metrics
from sklearn import *

from func_timeout import func_timeout, FunctionTimedOut
#from commons import *
from tqdm import tqdm

import import_ipynb
import Commons
import copy

eval_logger = logging.getLogger("evalutils")


def get_dataset(openmlid):
    ds = openml.datasets.get_dataset(openmlid)
    df = ds.get_data()[0]
    num_rows = len(df)
        
    # prepare label column as numpy array
    print(f"Read in data frame. Size is {len(df)} x {len(df.columns)}.")
    X = np.array(df.drop(columns=[ds.default_target_attribute]).values)
    y = np.array(df[ds.default_target_attribute].values)
    if y.dtype != int:
        y_int = np.zeros(len(y)).astype(int)
        vals = np.unique(y)
        for i, val in enumerate(vals):
            mask = y == val
            y_int[mask] = i
        y = y_int
        
    print(f"Data is of shape {X.shape}.")
    return X, y




def format_learner(learner):
    learner_name = str(learner).replace("\n", " ").replace("\t", " ")
    for k in  range(20):
        learner_name = learner_name.replace("  ", " ")
    return learner_name


def decide_block_train(pl, anchor):
    steps = pl.steps
    predictor = steps[-1][1]
    if type(predictor) == sklearn.ensemble.HistGradientBoostingClassifier:
        min_samples_leaf = predictor.min_samples_leaf
        return anchor >= 2* min_samples_leaf
    
    return True
    
class Evaluator:
    
    def __init__(self, X, y, binarize_sparse = False):
        self.X = X
        self.y = y
        self.best_observations=None
        # determine fixed pre-processing steps for imputation and binarization
        types = [set([type(v) for v in r]) for r in X.T]
        numeric_features = [c for c, t in enumerate(types) if len(t) == 1 and list(t)[0] != str]
        numeric_transformer = Pipeline([("imputer", sklearn.impute.SimpleImputer(strategy="median"))])
        categorical_features = [i for i in range(X.shape[1]) if i not in numeric_features]
        missing_values_per_feature = np.sum(pd.isnull(X), axis=0)
        eval_logger.info(f"There are {len(categorical_features)} categorical features, which will be binarized.")
        eval_logger.info(f"Missing values for the different attributes are {missing_values_per_feature}.")
        if len(categorical_features) > 0 or sum(missing_values_per_feature) > 0:
            categorical_transformer = Pipeline([
                ("imputer", sklearn.impute.SimpleImputer(strategy="most_frequent")),
                ("binarizer", sklearn.preprocessing.OneHotEncoder(handle_unknown='ignore', sparse = binarize_sparse)),
            ])
            self.mandatory_pre_processing = [("impute_and_binarize", ColumnTransformer(
                transformers=[
                    ("num", numeric_transformer, numeric_features),
                    ("cat", categorical_transformer, categorical_features),
                ]
            ))]
        else:
            self.mandatory_pre_processing = []
    
    def eval_pipeline_on_fold(self, pl, X_train, X_test, y_train, y_test, timeout = None):
        try:
            pl = Pipeline(self.mandatory_pre_processing + sklearn.base.clone(pl).steps)
            
            h1_before, h2_before = hash(X_train.tostring()), hash(X_test.tostring())
            if timeout is None:
                eval_logger.info(f"Fitting model with {X_train.shape[0]} instances and without timeout.")
                pl.fit(X_train, y_train)
            else:
                eval_logger.info(f"Fitting model with {X_train.shape[0]} instances and timeout {timeout}.")
                func_timeout(timeout, pl.fit, (X_train, y_train))
                
            y_hat = pl.predict(X_test)
            error_rate = 1 - sklearn.metrics.accuracy_score(y_test, y_hat)
            eval_logger.info(f"Observed an error rate of {error_rate}")
            h1_after, h2_after = hash(X_train.tostring()), hash(X_test.tostring())
            if h1_before != h1_after or h2_before != h2_after:
                raise Exception("Pipeline has modified the original data, which is forbidden!")
            return error_rate
        
        except FunctionTimedOut:
            eval_logger.info(f"Timeout observed for evaluation, stopping and returning nan.")
        except KeyboardInterrupt:
            raise
        except Exception as e:
            eval_logger.info(f"Observed some exception. Stopping. Exception: {e}")
        
        return np.nan
    
    def mccv(self, learner, target_size=.9, timeout=None, seed=0, repeats = 10):

        """
        Conducts a 90/10 MCCV (imitating a bit a 10-fold cross validation)
        """
        eval_logger.info(f"Running mccv with seed  {seed}")
        if not timeout is None:
            deadline = time.time() + timeout

        scores = []
        n = self.X.shape[0]
        num_examples = int(target_size * n)
        deadline = None if timeout is None else time.time() + timeout

        seed *= 13
        for r in range(repeats):
            eval_logger.info(f"Seed in MCCV: {seed}. Training on {num_examples} examples. That is {np.round(100 * num_examples / self.X.shape[0])}% of the data (testing on rest).")
            
            # get random train/test split based on seed
            random.seed(seed)
            n = self.X.shape[0]
            indices_train = random.sample(range(n), num_examples)
            mask_train = np.zeros(n)
            mask_train[indices_train] = 1
            mask_train = mask_train.astype(bool)
            mask_test = (1 - mask_train).astype(bool)
            X_train = self.X[mask_train]
            y_train = self.y[mask_train]
            X_test = self.X[mask_test]
            y_test = self.y[mask_test]
            
            # evaluate pipeline
            timeout_local = None if timeout is None else deadline - time.time()
            error_rate = self.eval_pipeline_on_fold(learner, X_train, X_test, y_train, y_test, timeout=timeout_local)
            scores.append(error_rate)
            seed += 1
            del X_train, X_test
        gc.collect()

        return scores
    
    def get_result_of_cv(self,learner_inst ,folds, seed = None, timeout = None):
        kf = sklearn.model_selection.KFold(n_splits=folds, random_state=np.random.RandomState(seed), shuffle=True)
        scores = []
        deadline = time.time() + timeout if timeout is not None else None
        for train_index, test_index in kf.split(X):
            X_train, y_train = X[train_index], y[train_index]
            X_test, y_test = X[test_index], y[test_index]
            timeout_loc = None if timeout is None else deadline - time.time()
            error_rate = self.eval_pipeline_on_fold(learner_inst, X_train, X_test, y_train, y_test, timeout = timeout_loc)
            if not np.isnan(error_rate):
                scores.append(error_rate)
        out = np.mean(scores) if scores else np.nan
        eval_logger.info(f"Returning {out} as the avg over observed scores {scores}")
        return out
    

    def get_pipeline_from_descriptor(self, learner):
        return learner
        #return sklearn.pipeline.Pipeline([(step_name, build_estimator(comp, params, self.X, self.y)) for step_name, (comp, params) in learner])

    '''
        This is the main function that must be implemented by the approaches
    '''
    def select_model(self, learners):
        raise NotImplemented()

        
        
        
        
        

class SH(Evaluator):
    
    def __init__(self, X, y, binarize_sparse, timeout_per_evaluation, max_train_budget, b_min = 64, seed = 0, repeats = 10):
        self.timeout_per_evaluation = timeout_per_evaluation
        self.b_min = b_min
        self.seed = seed
        self.repeats = repeats
        self.max_train_budget = max_train_budget
        self.r=0
        super().__init__(X, y, binarize_sparse)
    
    def select_model(self, learners):
        b_min = self.b_min
        test_budget = 1 - self.max_train_budget
        b_max = int(self.X.shape[0] * (1 - test_budget))
        timeout = self.timeout_per_evaluation
        print(f"b_max is {b_max}")
        n = len(learners)
        num_phases = int(np.log2(n) - 1)
        eta = (b_max / b_min)**(1/num_phases)
        print(f"Eta is {eta}")
        anchors = [int(np.round(b_min * eta**i)) for i in range(num_phases + 1)]
        populations = [int(np.round(n / (2**i))) for i in range(num_phases + 1)]
        if num_phases != int(num_phases):
            raise Exception(f"Number of learners is {len(learners)}, which is not a power of 2!")
        num_phases = int(num_phases)
        print(f"There will be {num_phases + 1} phases with the following setup.")
        for anchor, population in zip(anchors, populations):
            print(f"Evaluate {population} on {anchor}")

        best_seen_score = np.inf
        best_seen_pl = None

        def get_scores_on_budget(candidates, budget):
            scores = []
            for candidate in tqdm(candidates):
                deadline = None if timeout is None else time.time() + timeout

                temp_pipe = self.get_pipeline_from_descriptor(candidate)
                scores_for_candidate_at_budget = []
                for i in range(self.repeats):
                    if deadline < time.time():
                        break
                    try:
                        X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(self.X, self.y, train_size = budget, test_size = test_budget)
                        error_rate = self.eval_pipeline_on_fold(temp_pipe, X_train, X_test, y_train, y_test, deadline - time.time())
                        if not np.isnan(error_rate):
                            scores_for_candidate_at_budget.append(np.round(error_rate, 4))
                        else:
                            scores_for_candidate_at_budget.append(np.nan)
                    except KeyboardInterrupt:
                        raise
                    except Exception as e:
                        print(f"There was an error in the evaluation of candidate {candidate}. Ignoring it. Error: {e}")
                        scores_for_candidate_at_budget.append(np.nan)
                
                scores.append(scores_for_candidate_at_budget)
            print(scores)
            return scores

        time_start = time.time()
        #population = learners.copy()
        population=copy.deepcopy(learners)
        for i, anchor in enumerate(anchors):
            time_start_phase = time.time()
            scores_in_round = get_scores_on_budget(population, anchor)
            runtime_phase = time.time() - time_start_phase
            mean_scores_tmp = [np.nanmean(s) if np.count_nonzero(np.isnan(s)) < len(s) else np.nan for s in scores_in_round]
            if all(np.isnan(mean_scores_tmp)):
                print("All candidates evalated nan in last round, aborting evaluation.")
                break
            mean_scores = mean_scores_tmp
            index_of_best_mean_score_in_round = np.nanargmin(mean_scores)
            best_mean_score_in_round = mean_scores[index_of_best_mean_score_in_round]
            if best_mean_score_in_round < best_seen_score:
                best_seen_score = best_mean_score_in_round
                best_seen_pl = population[index_of_best_mean_score_in_round]

            print(f"Finished round {i+1} after {np.round(runtime_phase, 2)}s. Scores are: {mean_scores}.\nBest score was: {best_mean_score_in_round} (all times best score was {best_seen_score})")
            best_indices = np.argsort(mean_scores)[:int(len(population) / 2)]
            print(f"Best indices are: {best_indices}.")
            if len(population) > 2:
                population = [p for j, p in enumerate(population) if j in best_indices]
        runtime = time.time () - time_start

        return self.get_pipeline_from_descriptor(best_seen_pl)
    

class VerticalEvaluator(Evaluator):
    
    def __init__(self, X, y, binarize_sparse, validation, train_size, timeout_per_evaluation, epsilon, seed=0, exception_on_failure=False, other_args = {},best_observations=None):
        super().__init__(X, y, binarize_sparse)
        self.r=0
        self.best_observations = best_observations
        self.other_args = other_args
        if validation == "cv":
            if train_size == 0.8:
                num_folds = 5
            elif train_size == 0.9:
                num_folds = 10
            else:
                raise ValueError(f"Cannot run cross-validation for train_size {train_size}. Must be 0.8 or 0.9.")
            self.validation_func = lambda pl, seed: self.cv(pl, seed, num_folds, *self.other_args)
        elif "lccv" in validation:
            
            is_flex = "flex" in validation
            
            self.r = 1.0
            if train_size == 0.8:
                self.validation_func = self.lccv80flex if is_flex else self.lccv80
            elif train_size == 0.9:
                self.validation_func = self.lccv90flex if is_flex else self.lccv90
            else:
                raise ValueError(f"Cannot run LCCV for train_size {train_size}. Must be 0.8 or 0.9.")
        elif validation == "wilcoxon":
            self.r = 1.0
            self.best_observations = None
            if train_size == 0.8:
                self.validation_func = lambda pl, seed: self.wilcoxon(pl, seed = seed, folds = 5)
            elif train_size == 0.9:
                self.validation_func = lambda pl, seed: self.wilcoxon(pl, seed = seed, folds = 10)
            else:
                raise ValueError(f"Cannot run Wilcoxon for train_size {train_size}. Must be 0.8 or 0.9.")
        else:
            raise ValueError(f"Unsupported validation function {validation}.")
        self.timeout_per_evaluation = timeout_per_evaluation
        self.epsilon = epsilon
        self.seed = seed
        self.exception_on_failure = exception_on_failure
        
    def cv(self, pl, seed, folds):
        kf = sklearn.model_selection.KFold(n_splits=folds, random_state=np.random.RandomState(seed), shuffle=True)
        scores = []
        deadline = time.time() + self.timeout_per_evaluation if self.timeout_per_evaluation is not None else None
        for train_index, test_index in kf.split(self.X):
            learner_inst_copy = sklearn.base.clone(pl)
            X_train, y_train = self.X[train_index], self.y[train_index]
            X_test, y_test = self.X[test_index], self.y[test_index]
            timeout_loc = None if deadline is None else deadline - time.time()
            scores.append(self.eval_pipeline_on_fold(pl, X_train, X_test, y_train, y_test, timeout = timeout_loc))
        require_at_least_two = time.time() < deadline
        is_valid_result = len(scores) > 0 and ((not require_at_least_two) or np.count_nonzero(np.isnan(scores)) < folds - 1)
        out = np.nanmean(scores) if is_valid_result else np.nan # require at least two valid samples in the batch if the timeout was not hit
        eval_logger.info(f"Returning {out} as the avg over observed scores {scores}")
        return out
    
    def wilcoxon(self, pl, seed=0, folds = 10):

        eval_logger.info(f"Running Wilcoxon-guarded CV with seed  {seed}")
        if not self.timeout_per_evaluation is None:
            deadline = time.time() + self.timeout_per_evaluation
        
        kf = sklearn.model_selection.KFold(n_splits=folds, random_state=np.random.RandomState(seed), shuffle=True)
        scores = []
        deadline = time.time() + self.timeout_per_evaluation if self.timeout_per_evaluation is not None else None
        for inner_run, (train_index, test_index) in enumerate(kf.split(self.X)):
            learner_inst_copy = sklearn.base.clone(pl)
            X_train, y_train = self.X[train_index], self.y[train_index]
            X_test, y_test = self.X[test_index], self.y[test_index]
            timeout_loc = None if deadline is None else deadline - time.time()
            scores.append(self.eval_pipeline_on_fold(pl, X_train, X_test, y_train, y_test, timeout = timeout_loc))

            # now conduct a wilcoxon signed rank test to determine whether significance has been reached
            scores_currently_best = np.array(self.best_observations[:len(scores)]) if self.best_observations is not None else np.ones(len(scores))
            eval_logger.info(f"Currently best observations after {inner_run + 1} evaluations: {np.round(scores_currently_best, 2)}")
            eval_logger.info(f"Current cand.  observations after {inner_run + 1} evaluations: {np.round(scores, 2)}")
            if any(np.array(scores) != scores_currently_best):
                statistic, pval = scipy.stats.wilcoxon(scores, scores_currently_best)
                eval_logger.info(f"p-value is {pval}")
                if pval < 0.05:
                    eval_logger.info(f"reached certainty in fold {inner_run + 1}.")
                    if np.mean(scores) > self.r:
                        eval_logger.info("it is certainly worse, so aborting.")
                        break
                    else:
                        eval_logger.info("it is certainly better, so continuing")
            else:
                eval_logger.info("omitting test, because all scores are still identical")
        require_at_least_two = time.time() < deadline
        is_valid_result = len(scores) > 0 and ((not require_at_least_two) or np.count_nonzero(np.isnan(scores)) < folds - 1)
        out = np.nanmean(scores) if is_valid_result else np.nan # require at least two valid samples in the batch if the timeout was not hit
        if not np.isnan(out) and out < self.r:
            self.r = out
            self.best_observations = scores
        eval_logger.info(f"Returning {out} as the avg over observed scores {scores}")
        return out
    
    def lccv90(self, pl, seed): # maximum train size is 90% of the data (like for 10CV)
        try:
            #enforce_all_anchor_evaluations = self.r == 1
            pl = Pipeline(self.mandatory_pre_processing + pl.steps)
            args = {
                "r": self.r,
                "timeout": self.timeout_per_evaluation,
                "seed": seed,
                "target_anchor": .9,
                "min_evals_for_stability": 3,
                "MAX_EVALUATIONS": 10,
                "enforce_all_anchor_evaluations": enforce_all_anchor_evaluations,
                "fix_train_test_folds": True
            }
            for key, val in self.other_args.items():
                args[key] = val
            
            score,score_est,elc,model = lccv.lccv(pl, self.X, self.y,r=.90)
            self.r = min(self.r, score_est)
            return score_est
        except KeyboardInterrupt:
            raise
        except:
            eval_logger.info("Observed some exception. Returning nan")
            return np.nan

    def lccv80(self, pl, seed=None): # maximum train size is 80% of the data (like for 5CV)
        try:
            enforce_all_anchor_evaluations = self.r == 1
            pl = Pipeline(self.mandatory_pre_processing + pl.steps)
            args = {
                "r": self.r,
                "timeout": self.timeout_per_evaluation,
                "seed": seed,
                "target_anchor": .8,
                "min_evals_for_stability": 3,
                "MAX_EVALUATIONS": 5,
                "enforce_all_anchor_evaluations": enforce_all_anchor_evaluations,
                "fix_train_test_folds": True
            }
            
            for key, val in self.other_args.items():
                args[key] = val
            print(args)
            score,score_est,elc,model = lccv.lccv(pl, self.X, self.y,self.r,enforce_all_anchor_evaluations=True,target_anchor=0.8,seed=seed,min_evals_for_stability=3,fix_train_test_folds=True,timeout=timeout_per_evaluation)
            print(pl)
            print(score)
            self.r = min(self.r, score_est)
            return score_est
        except KeyboardInterrupt:
            raise
        except:
            eval_logger.info("Observed some exception. Returning nan")
            return np.nan

    def lccv90flex(self, pl, seed=None): # maximum train size is 90% of the data (like for 10CV)
        try:
            enforce_all_anchor_evaluations = self.r == 1
            pl = Pipeline(self.mandatory_pre_processing + pl.steps)
            
            args = {
                "r": self.r,
                "timeout": self.timeout_per_evaluation,
                "seed": seed,
                "target_anchor": .9,
                "min_evals_for_stability": 3,
                "MAX_EVALUATIONS": 10,
                "enforce_all_anchor_evaluations": enforce_all_anchor_evaluations,
                "use_train_curve": decide_block_train,
                "fix_train_test_folds": False
            }
            for key, val in self.other_args.items():
                args[key] = val
            
            _,score,elc,model = lccv.lccv(pl, self.X, self.y,r=0.90)
            self.r = min(self.r, score)
            return score
        except KeyboardInterrupt:
            raise
        except Exception as e:
            traceback.print_exc()
            eval_logger.info("Observed some exception. Returning nan")
            return np.nan

    def lccv80flex(self, pl, seed=None): # maximum train size is 80% of the data (like for 5CV)
        try:
            #enforce_all_anchor_evaluations = self.r == 1
            pl = Pipeline(self.mandatory_pre_processing + pl.steps)
            _,score,elc,model = lccv.lccv(pl, self.X, self.y, r=self.r, timeout=self.timeout_per_evaluation, seed=seed, target_anchor=.8, min_evals_for_stability=3, MAX_EVALUATIONS = 5, enforce_all_anchor_evaluations = enforce_all_anchor_evaluations,fix_train_test_folds=False, use_train_curve=decide_block_train, visualize_lcs = False)[0]
            self.r = min(self.r, score)
            print(score)
            return elc
        except KeyboardInterrupt:
            raise
        except Exception as e:
            eval_logger.info(f"Observed some exception. Returning nan. Exception was {e}")
            return np.nan
    
    def select_model(self, learners, errors = "ignore"):
        
        hard_cutoff = 2 * self.timeout_per_evaluation
        r = 1.0
        best_score = 1
        chosen_learner = None
        validation_times = []
        exp_logger = logging.getLogger("experimenter")
        n = len(learners)
        memory_history = []
        index_of_best_learner = -1

        target_anchor = int(np.floor(self.X.shape[0] * .9))  # TODO hardcoded, please fix
        target_anchor_count = 0
        learner_crash_count = 0
        for i, learner in enumerate(learners):
            temp_pipe = self.get_pipeline_from_descriptor(learner)
            exp_logger.info(f"""
                --------------------------------------------------
                Checking learner {i + 1}/{n} (""" + str(temp_pipe).replace("\n", "").replace("\t", "").replace(" ", "").replace(" ", "").replace(" ", "").replace(" ", "").replace(" ", "").replace(" ", "").replace(" ", "").replace(" ", "") + """)
                --------------------------------------------------""")
            cur_mem = int(psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024)
            memory_history.append(cur_mem)
            exp_logger.info(f"Currently used memory: {cur_mem}MB. Memory history is: {memory_history}")
            
            validation_start = time.time()
            try:
                score = self.validation_func(temp_pipe, seed=13 * self.seed + i)
                runtime = time.time() - validation_start
                eval_logger.info(f"Observed score {score} for {format_learner(temp_pipe)}. Validation took {int(np.round(runtime * 1000))}ms")
                r = min(r, score + self.epsilon)
                eval_logger.info(f"r is now: {r}")
                if score < best_score:
                    best_score = score
                    chosen_learner = temp_pipe
                    index_of_best_learner = i
                    eval_logger.info(f"Thas was a NEW BEST score. r has been updated. In other words, currently chosen model is {format_learner(chosen_learner)}")
                else:
                    del temp_pipe
                    gc.collect()
                    eval_logger.info(f"Candidate was NOT competitive. Eliminating the object and garbage collecting.")
                
            except KeyboardInterrupt:
                raise
            except Exception as e:
                del temp_pipe
                gc.collect()
                exp_logger.info(f"Candidate was unsuccessful, deleting it from memory.")
                runtime = time.time() - validation_start
                
                if errors == "raise":
                    raise e
                
            validation_times.append(runtime)
            
        eval_logger.info(f"Chosen learner was found in iteration {index_of_best_learner + 1}")
        return chosen_learner
    

    

In [27]:
X,y=get_dataset(1457)
print(X,y)

  ds = openml.datasets.get_dataset(openmlid)


Read in data frame. Size is 1500 x 10001.
Data is of shape (1500, 10000).
[[ 5.  3.  4. ...  0.  0.  0.]
 [12.  3.  6. ...  0.  0.  0.]
 [ 3.  2.  2. ...  0.  1.  0.]
 ...
 [ 8. 10.  2. ...  0.  0.  0.]
 [11. 12. 10. ...  0.  1.  2.]
 [ 8.  7.  5. ...  0.  1.  1.]] [ 0  0  0 ... 49 49 49]


In [28]:
%%time
lccv.lccv(sklearn.tree.DecisionTreeClassifier(),X,y,r=0.80)

Wall time: 1min 36s


(nan,
 0.22666666666666666,
 {64: {'n': 3,
   'mean': 0.06444444444444444,
   'std': 0.00831479419283098,
   'conf': array([0.05503554, 0.07385335])},
  128: {'n': 3,
   'mean': 0.11777777777777781,
   'std': 0.00831479419283098,
   'conf': array([0.10836888, 0.12718668])},
  256: {'n': 5,
   'mean': 0.22666666666666666,
   'std': 0.015202339001321832,
   'conf': array([0.21334147, 0.23999186])},
  512: {'n': 4,
   'mean': 0.3433333333333333,
   'std': 0.017320508075688766,
   'conf': array([0.32635955, 0.36030712])},
  1024: {'n': 3,
   'mean': 0.40444444444444444,
   'std': 0.04012326685615066,
   'conf': array([0.35904153, 0.44984736])}},
 <lccv.lccv.EmpiricalLearningModel at 0x1f8601b88b0>)

In [83]:
%%time
lccv.lccv(sklearn.tree.DecisionTreeClassifier(),X,y,r=0.80,target_anchor=0.8)

Wall time: 1min 28s


(nan,
 0.33916666666666667,
 {64: {'n': 3,
   'mean': 0.060000000000000005,
   'std': 0.012472191289246468,
   'conf': array([0.04588665, 0.07411335])},
  128: {'n': 3,
   'mean': 0.14333333333333334,
   'std': 0.024944382578492942,
   'conf': array([0.11510663, 0.17156004])},
  256: {'n': 5,
   'mean': 0.2106666666666667,
   'std': 0.05242560867023334,
   'conf': array([0.16471444, 0.25661889])},
  512: {'n': 4,
   'mean': 0.33916666666666667,
   'std': 0.009537935951882999,
   'conf': array([0.32981966, 0.34851367])},
  1024: {'n': 3,
   'mean': 0.41111111111111115,
   'std': 0.017708197167232476,
   'conf': array([0.39107277, 0.43114945])}},
 <lccv.lccv.EmpiricalLearningModel at 0x1f85e64cbe0>)

In [29]:
from sklearn.tree import DecisionTreeClassifier
pipe = Pipeline([('DTC',DecisionTreeClassifier())])
selector=VerticalEvaluator(X,y,False,"cv",train_size=0.8,timeout_per_evaluation=100,epsilon = 0.01)

In [30]:
%%time
selector.cv(pipe,seed=0,folds=5)

  h1_before, h2_before = hash(X_train.tostring()), hash(X_test.tostring())
  h1_after, h2_after = hash(X_train.tostring()), hash(X_test.tostring())
  h1_before, h2_before = hash(X_train.tostring()), hash(X_test.tostring())
  h1_after, h2_after = hash(X_train.tostring()), hash(X_test.tostring())
  h1_before, h2_before = hash(X_train.tostring()), hash(X_test.tostring())
  h1_after, h2_after = hash(X_train.tostring()), hash(X_test.tostring())
  h1_before, h2_before = hash(X_train.tostring()), hash(X_test.tostring())
  h1_after, h2_after = hash(X_train.tostring()), hash(X_test.tostring())
  h1_before, h2_before = hash(X_train.tostring()), hash(X_test.tostring())
  h1_after, h2_after = hash(X_train.tostring()), hash(X_test.tostring())


Wall time: 1min 14s


0.5793333333333333

In [31]:
selector=VerticalEvaluator(X,y,False,"wilcoxon",train_size=0.8,timeout_per_evaluation=100,epsilon = 0.01)

In [32]:
selector.wilcoxon(pipe)

  h1_before, h2_before = hash(X_train.tostring()), hash(X_test.tostring())
  h1_after, h2_after = hash(X_train.tostring()), hash(X_test.tostring())
  h1_before, h2_before = hash(X_train.tostring()), hash(X_test.tostring())
  h1_after, h2_after = hash(X_train.tostring()), hash(X_test.tostring())
  h1_before, h2_before = hash(X_train.tostring()), hash(X_test.tostring())
  h1_after, h2_after = hash(X_train.tostring()), hash(X_test.tostring())
  h1_before, h2_before = hash(X_train.tostring()), hash(X_test.tostring())
  h1_after, h2_after = hash(X_train.tostring()), hash(X_test.tostring())
  h1_before, h2_before = hash(X_train.tostring()), hash(X_test.tostring())
  h1_after, h2_after = hash(X_train.tostring()), hash(X_test.tostring())
  h1_before, h2_before = hash(X_train.tostring()), hash(X_test.tostring())
  h1_after, h2_after = hash(X_train.tostring()), hash(X_test.tostring())
  h1_before, h2_before = hash(X_train.tostring()), hash(X_test.tostring())
  h1_before, h2_before = hash(X_train

0.5522222222222222

In [79]:
selector=SH(X,y,False,100,64)
pipe = Pipeline(steps=[("dataprocessor",MultinomialNB())])

In [80]:
selector.select_model(pipe)

b_max is 96000
Eta is 0.0006666666666666666
There will be 0 phases with the following setup.


In [72]:
import sklearn.ensemble
from sklearn.ensemble import RandomForestClassifier
import sklearn.naive_bayes
from sklearn.naive_bayes import MultinomialNB

In [36]:
help(SH)

Help on class SH in module __main__:

class SH(Evaluator)
 |  SH(X, y, binarize_sparse, timeout_per_evaluation, max_train_budget, b_min=64, seed=0, repeats=10)
 |  
 |  Method resolution order:
 |      SH
 |      Evaluator
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, X, y, binarize_sparse, timeout_per_evaluation, max_train_budget, b_min=64, seed=0, repeats=10)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  select_model(self, learners)
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from Evaluator:
 |  
 |  eval_pipeline_on_fold(self, pl, X_train, X_test, y_train, y_test, timeout=None)
 |  
 |  get_pipeline_from_descriptor(self, learner)
 |  
 |  get_result_of_cv(self, learner_inst, folds, seed=None, timeout=None)
 |  
 |  mccv(self, learner, target_size=0.9, timeout=None, seed=0, repeats=10)
 |      Conducts a 90/10 MCCV (imitating a bit a 10-fold cross validation)
 

In [24]:
help(VerticalEvaluator)

Help on class VerticalEvaluator in module __main__:

class VerticalEvaluator(Evaluator)
 |  VerticalEvaluator(X, y, binarize_sparse, validation, train_size, timeout_per_evaluation, epsilon, seed=0, exception_on_failure=False, other_args={}, best_observations=None)
 |  
 |  Method resolution order:
 |      VerticalEvaluator
 |      Evaluator
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, X, y, binarize_sparse, validation, train_size, timeout_per_evaluation, epsilon, seed=0, exception_on_failure=False, other_args={}, best_observations=None)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  cv(self, pl, seed, folds)
 |  
 |  lccv80(self, pl, seed=None)
 |  
 |  lccv80flex(self, pl, seed=None)
 |  
 |  lccv90(self, pl, seed)
 |  
 |  lccv90flex(self, pl, seed=None)
 |  
 |  select_model(self, learners, errors='ignore')
 |  
 |  wilcoxon(self, pl, seed=0, folds=10)
 |  
 |  --------------------------------------------------------

In [81]:
help(lccv.lccv)

Help on function lccv in module lccv.lccv:

lccv(learner_inst, X, y, r, timeout=None, base=2, min_exp=6, MAX_ESTIMATE_MARGIN_FOR_FULL_EVALUATION=0.005, MAX_EVALUATIONS=10, target_anchor=0.9, schedule=None, return_estimate_on_incomplete_runs=False, max_conf_interval_size_default=0.1, max_conf_interval_size_target=0.001, enforce_all_anchor_evaluations=False, seed=0, verbose=False, logger=None, min_evals_for_stability=3, use_train_curve=True, fix_train_test_folds=False, evaluator=None, scoring='accuracy', visualize_lcs=False, exceptions='message')
    Evaluates a learner in an iterative fashion, using learning curves. The
    method builds upon the assumption that learning curves are convex. After
    each iteration, it checks whether the convexity assumption is still valid.
    If not, it tries to repair it.
    Also, after each iteration it checks whether the performance of the best
    seen learner so far is still reachable by making an optimistic extrapolation.
    If not, it stops th