In [1]:
# default_exp experiment

# Experiments

In [12]:
%load_ext autoreload
%autoreload 2

In [13]:
from pathlib import Path

from sacred import Experiment
from sacred.run import Run
from sacred.observers import S3Observer

from nbdev.export import Config
from sciflow.lake_observer import AWSLakeObserver

In [17]:
bucket = Config().get('bucket')
# 's3bawspprwe1chatbotunpub01'
region = 'eu-west-1'
exp_name = 'sacred_sciflow'
lib_name = Config().get('lib_name')

In [22]:
exp = Experiment(exp_name, interactive=True)
obs = AWSLakeObserver(bucket_name=bucket, 
                   experiment_dir=f'experiments/{lib_name}/{exp_name}',
                   region=region)
exp.observers.append(obs)

In [32]:
# parameters
alpha = 1
input_path = ''
model_path = ''

In [33]:
@exp.config
def my_config():
    alpha = 0.1

In [34]:
def preprocess(input_data):
    pass

In [35]:
def train(train_data, alpha):
    pass

In [36]:
def evaluate(model, test_data):
    pass

In [24]:


@exp.main
def my_main(message, _run: Run):
    _run.add_artifact(Path('test/requirements-generated.txt'))
    _run.add_artifact(Path('test/dataframe_artifact.csv'))
    _run.log_scalar('mae', 1.79, 0)
    _run.log_scalar('mae', 3.12, 1)
    _run.log_scalar('another one', 9.12, 0)
    print(message)

In [26]:
run = exp.run(config_updates={'recipient': 'sciflow'})

INFO:sacred_sciflow:Running command 'my_main'
INFO:sacred_sciflow:Started run with ID "1"
INFO:sacred_sciflow:Completed after 0:00:01


Hello sciflow!


In [43]:
from typing import Callable, Optional, Sequence

In [44]:
def create_exp(experiment_name: str, train_eval_func: Callable):
    ex = Experiment(experiment_name)

    @ex.config
    def training_config():
        X_path = None
        y_path = None
        model = None
        train_params = {}
        tags = None

    @ex.capture
    def train_eval(X_path, y_path, model, train_params, tags):
        return train_eval_func(X_path, y_path, model, train_params, tags)

    @ex.automain
    def main_experiment(_run: Run):
        results = train_eval()

        # Log metrics of importance.
        log_metrics(_run, results)

        # Add files to the experiment DB
        add_artifacts(_run, results)

        return float(min(results["test_mae"]))

    def add_artifacts(_run, results):
        _run.add_artifact("/tmp/useful_feats.csv")

    def log_metrics(_run, results):
        _run.log_scalar("name", os.environ["USER"])
    return ex


def experiment(
    X: pd.DataFrame,
    y: pd.Series,
    config_params: dict,
    train_and_eval_func: Callable,
    exp_name: str = "hof_prediction",
    tags: Optional[Sequence[str]] = None,
    use_mongo: bool = True,
):
    """
    This function is the driver for running experiments in notebooks or scripts.
    :param X:
    :param y:
    :param config_params: Must be one of the variables defined in @ex.config
    :param train_and_eval_func: a function which performs training and evaluation. It
    must have the same signature as the one in @ex.capture
    :param exp_name: A name to identify the experiment
    :param tags: any tags which will be stored in DB and can be viewed in UI
    :param use_mongo: use of the approved sacred database is highly recommended
    :return:
    """
    ex = create_regression_exp(exp_name, train_and_eval_func)

    X_path, y_path = write_data(X, y)
    config_params.update({"X_path": X_path, "y_path": y_path})
    if tags is not None:
        config_params.update({"tags": tags})
    run = ex.run(config_updates=config_params)
    return run