### Initialization
* Check whether the runtime is host or local.
* Mount Google Drive when using the host runtime.

In [0]:
try:
  from google.colab import drive
  drive.mount('/gdrive')
  runtime = "host"
except:
  runtime = "local"

### Parameters

In [0]:
#@title Parameters
#@markdown |Name            |Description|
#@markdown |:---            |:---|
#@markdown |`seed`|The random seed|
seed = 3984 #@param {type: "number"}

#@markdown ### `nl2code` Repositories
#@markdown |Name            |Description|
#@markdown |:---            |:---|
#@markdown |`repository_url`|The URL of `nl2code` git repository (enabled only in the host runtime)|
#@markdown |`branch_name`   |The branch name (enabled only in the host runtime)|
repository_url = "https://github.com/HiroakiMikami/NL2Code-reimplementation" #@param {type: "string"}
branch_name = "master" #@param {type: "string"}

#@markdown ### Filepathes
#@markdown |Name         |Description|
#@markdown |:---         |:---|
#@markdown |`output_path`|The path of the dataset file.|
output_path = "./dataset/django.pickle" #@param {type: "string"}



### Setup
* Fix the random seed
* Download the codebase (when using the host runtime)
  1. Clone git repository and move to the specified branch
  2. Install modules

In [0]:
import numpy as np
import random

SEED_MAX = 2**32 - 1

root_rng = np.random.RandomState(seed)
random.seed(root_rng.randint(SEED_MAX))
np.random.seed(root_rng.randint(SEED_MAX))

In [0]:
if runtime == "host":
  %cd /content
  !rm -rf NL2Code
  ![ ! -e NL2Code ] && git clone $repository_url NL2Code
  %cd NL2Code
  !git checkout origin/$branch_name
  !git submodule init
  !git submodule update
  !pip install -r requirements.txt

### Download dataset
* download dataset
* Format annotation

In [0]:
import tempfile
with tempfile.TemporaryDirectory() as tmpdirname:
    !mkdir -p $tmpdirname/tmp
    !git clone --depth 1 https://github.com/odashi/ase15-django-dataset/ $tmpdirname/tmp/django

    with open("{}/tmp/django/django/all.anno".format(tmpdirname)) as f:
        annotation = f.readlines()
    with open("{}/tmp/django/django/all.code".format(tmpdirname)) as f:
        code = f.readlines()


In [0]:
from examples.django import format_annotations

annotation = format_annotations(annotation)

### Save dataset
* save dataset

In [0]:
import os
import pickle
from examples.django import Entry

if not os.path.exists(os.path.dirname(output_path)):
    os.makedirs(os.path.dirname(output_path))

data = list(map(lambda x: Entry(x[0], x[1]), zip(annotation, code)))
with open(output_path, "wb") as f:
    pickle.dump(data, f)
