### Initialization
* Check whether the runtime is host or local.
* Mount Google Drive when using the host runtime.

In [0]:
try:
  from google.colab import drive
  drive.mount('/gdrive')
  runtime = "host"
except:
  runtime = "local"

### Parameters

In [0]:
#@title Parameters
#@markdown |Name            |Description|
#@markdown |:---            |:---|
#@markdown |`seed`|The random seed|
seed = 3984 #@param {type: "number"}

#@markdown ### `deep-coder` Repositories
#@markdown |Name            |Description|
#@markdown |:---            |:---|
#@markdown |`repository_url`|The URL of `deep-coder` git repository (enabled only in the host runtime)|
#@markdown |`branch_name`   |The branch name (enabled only in the host runtime)|
repository_url = "https://github.com/HiroakiMikami/deep-coder" #@param {type: "string"}
branch_name = "master" #@param {type: "string"}

#@markdown ### Validation Settings
#@markdown |Name                |Description|
#@markdown |:---                |:---|
#@markdown |`timeout_second`    ||
#@markdown |`max_program_length`|The maximum length of the program|
timeout_second = 1 #@param {type: "number"}
max_program_length = 2 #@param {type: "number"}

#@markdown ### Filepath
#@markdown |Name                |Description|
#@markdown |:---                |:---|
#@markdown |`train_dataset_path`|The file path of the training dataset.|
#@markdown |`valid_dataset_path`|The file path of the validation dataset.|
#@markdown |`destination_path`  |The directory of the directory that will contain the training results.|
train_dataset_path = "./dataset/train.pickle" #@param {type: "string"}
valid_dataset_path = "./dataset/valid.pickle" #@param {type: "string"}
destination_path = "./out/baseline" #@param {type: "string"}



### Setup
* Fix the random seed
* Download the codebase
  1. Clone git repository and move to the specified branch
  2. Initialize submodule
  3. Install chainer and cupy
* Copy the dataset from Google Drive

In [0]:
import numpy as np
import random

SEED_MAX = 2**32 - 1

root_rng = np.random.RandomState(seed)
random.seed(root_rng.randint(SEED_MAX))
np.random.seed(root_rng.randint(SEED_MAX))

In [0]:
if runtime == "host":
  %cd /content
  !rm -rf deep-coder
  ![ ! -e deep-coder ] && git clone $repository_url deep-coder
  %cd deep-coder
  !git checkout origin/$branch_name
  !git submodule init
  !git submodule update
  !make -C DeepCoder_Utils/enumerative-search -j `nproc`
  !curl https://colab.chainer.org/install | sh -  !pip install tqdm

### Validate Baseline Predictor

In [0]:
import pickle
import os
import chainer as ch
from chainer import datasets
from src.dataset import EncodedDataset, Dataset
import src.inference as I
from src.model import ModelShapeParameters
from tqdm import tqdm_notebook as tqdm

with open(valid_dataset_path, "rb") as f:
    dataset: Dataset = pickle.load(f)

with open(train_dataset_path, "rb") as f:
    train: ch.datasets.TupleDataset = pickle.load(f).dataset
pred = I.predict_with_prior_distribution(train)

results = dict([])
num_succ = 0
for i, (entry,) in enumerate(tqdm(dataset.dataset)):
    result = I.search(
        os.path.join(os.getcwd(), "DeepCoder_Utils",
                     "enumerative-search", "search"),
        timeout_second,
        dataset.metadata.value_range,
        entry.examples,
        max_program_length,
        pred
    )
    results[i] = result
    if result.is_solved:
        num_succ += 1

print("Solved: {} of {} examples".format(num_succ, len(dataset.dataset)))


### Teardown
* Save the baseline result

In [0]:
import os
import chainer as ch

if not os.path.exists(destination_path):
    os.makedirs(destination_path)

with open(os.path.join(destination_path, "result.pickle"), "wb") as f:
    pickle.dump(results, f)
