### Initialization
* Check whether the runtime is host or local.
* Mount Google Drive when using the host runtime.

In [0]:
try:
  from google.colab import drive
  drive.mount('/gdrive')
  runtime = "host"
except:
  runtime = "local"

### Parameters

In [0]:
#@title Parameters
#@markdown |Name            |Description|
#@markdown |:---            |:---|
#@markdown |`seed`|The random seed|
seed = 3984 #@param {type: "number"}

#@markdown ### `nl2code` Repositories
#@markdown |Name            |Description|
#@markdown |:---            |:---|
#@markdown |`repository_url`|The URL of `deep-coder` git repository (enabled only in the host runtime)|
#@markdown |`branch_name`   |The branch name (enabled only in the host runtime)|
repository_url = "https://github.com/HiroakiMikami/NL2Code-reimplementation" #@param {type: "string"}
branch_name = "master" #@param {type: "string"}

#@markdown ### Filepathes
#@markdown |Name         |Description|
#@markdown |:---         |:---|
#@markdown |`output_path`|The path of the dataset file.|
output_path = "/gdrive/My Drive/NL2Code/hearthstone/dataset.pickle" #@param {type: "string"}



### Setup
* Fix the random seed
* Download the codebase (when using the host runtime)
  1. Clone git repository and move to the specified branch
  2. Install modules

In [0]:
import numpy as np
import random

SEED_MAX = 2**32 - 1

root_rng = np.random.RandomState(seed)
random.seed(root_rng.randint(SEED_MAX))
np.random.seed(root_rng.randint(SEED_MAX))

In [0]:
if runtime == "host":
    %cd /content
    !rm -rf NL2Code
    ![ ! -e NL2Code ] && git clone $repository_url NL2Code
    %cd NL2Code
    !git checkout $branch_name
    !pip install -e .
    !pip install -e . ".[django]"
# load tqdm
!pip install --force https://github.com/chengs/tqdm/archive/colab.zip

### Download dataset
* download dataset
* Format annotation

In [0]:
import tempfile
from nl2code_examples.hearthstone import Entry
with tempfile.TemporaryDirectory() as tmpdirname:
    !mkdir -p $tmpdirname/tmp
    !git clone --depth 1 https://github.com/deepmind/card2code $tmpdirname/tmp/card2code
    
    dataset = {}
    for name in ["train", "dev", "test"]:
        target = name
        if name == "dev":
            target = "valid"
        with open("{}/tmp/card2code/third_party/hearthstone/{}_hs.in".format(tmpdirname, name)) as f:
            query = f.readlines()
        with open("{}/tmp/card2code/third_party/hearthstone/{}_hs.out".format(tmpdirname, name)) as f:
            code = f.readlines()
            code = [c.replace("§", "\n").replace("and \\", "and ") for c in code]
        entries = []
        for q, c in zip(query, code):
            entries.append(Entry(q, c))
        dataset[target] = entries

### Save dataset
* save dataset

In [0]:
import os
import pickle

if not os.path.exists(os.path.dirname(output_path)):
    os.makedirs(os.path.dirname(output_path))

with open(output_path, "wb") as f:
    pickle.dump(dataset, f)