### Initialization
* Check whether the runtime is host or local.
* Mount Google Drive when using the host runtime.

In [0]:
try:
  from google.colab import drive
  drive.mount('/gdrive')
  runtime = "host"
except:
  runtime = "local"

### Parameters

In [0]:
#@title Parameters
#@markdown |Name            |Description|
#@markdown |:---            |:---|
#@markdown |`seed`|The random seed|
seed = 20367 #@param {type: "number"}

#@markdown ### `nl2code` Repositories
#@markdown |Name            |Description|
#@markdown |:---            |:---|
#@markdown |`repository_url`|The URL of `nl2code` git repository (enabled only in the host runtime)|
#@markdown |`branch_name`   |The branch name (enabled only in the host runtime)|
repository_url = "https://github.com/HiroakiMikami/NL2Code-reimplementation" #@param {type: "string"}
branch_name = "master" #@param {type: "string"}

#@markdown ### Dataset Settings
#@markdown |Name               |Description|
#@markdown |:---               |:---|
#@markdown |`max_action_length`|The maximum action length|
max_action_length = 100 #@param {type: "number"}

#@markdown ### Model Parameters
#@markdown |Name                     |Description|
#@markdown |:---                     |:---|
#@markdown |`embedding_dim`          |The dimension of word, token, and rule embeddings|
#@markdown |`node_type_embedding_dim`|The dimension of node type embedding dim|
#@markdown |`lstm_state_size`        |The size of LSTM state|
#@markdwon |`hidden_state_size`      |The size of attention hidden state|
embedding_dim = 128 #@param {type: "number"}
node_type_embedding_dim = 64 #@param {type: "number"}
lstm_state_size = 256 #@param {type: "number"}
hidden_state_size = 50 #@param {type: "number"}


#@markdown ### Inference Settings
#@markdown |Name                 |Description|
#@markdown |:---                 |:---|
#@markdown |`beam_size`          |The beam size|
beam_size = 15 #@param {type: "number"}

#@markdown ### Other Settings
#@markdown |Name    |Description|
#@markdown |:---    |:---|
#@markdown |`device`|The id of GPU. `-1` means that CPU is used.|
device = 0 #@param {type: "number"}

#@markdown ### Filepathes
#@markdown |Name            |Description|
#@markdown |:---            |:---|
#@markdown |`model_dir_path`|The path of the dataset.|
model_dir_path = "/gdrive/My Drive/NL2Code/django/result" #@param {type: "string"}



### Setup
* Download the codebase (when using the host runtime)
  1. Clone git repository and move to the specified branch
  2. Install modules
* Use GPU
* Fix the random seed

In [0]:
if runtime == "host":
    %cd /content
    !rm -rf NL2Code
    ![ ! -e NL2Code ] && git clone $repository_url NL2Code
    %cd NL2Code
    !git checkout $branch_name
    !pip install -e .
    !pip install -e . ".[examples]"
# load tqdm
!pip install --force https://github.com/chengs/tqdm/archive/colab.zip

In [0]:
import torch
if device != -1:
    torch.cuda.set_device(device)

In [0]:
import numpy as np
import random
import torch

SEED_MAX = 2**32 - 1

root_rng = np.random.RandomState(seed)
random.seed(root_rng.randint(SEED_MAX))
np.random.seed(root_rng.randint(SEED_MAX))
torch.manual_seed(root_rng.randint(SEED_MAX))

### Setup inference
* Load the encoder
* Create and save encoder
* Create model
* Use GPU (if needed)

In [0]:
from nl2code_examples.django import DatasetEncoder
import pickle
import os

with open(os.path.join(model_dir_path, "encoder.pickle"), "rb") as file:
    encoder = pickle.load(file)

In [0]:
from nl2code_examples.django import TrainingModel
model = TrainingModel(encoder, embedding_dim, node_type_embedding_dim,
                      lstm_state_size, hidden_state_size, 0.0)
if device != -1:
    model = model.cuda()

### Inference

In [0]:
query = "define the method _create_message with 2 arguments: self and msg" #@param {type: "string"}

import os
import torch
import torch.nn.utils.rnn as rnn
from typing import List
import nl2code.nn.utils.rnn as nrnn
from nl2prog.language.python import is_subtype, to_python_ast
from nl2prog.utils.nl2code import BeamSearchSynthesizer
from nl2code_examples.django import unparse
from nl2code_examples.django._dataset import tokenize_annotation

query, query_with_placeholders = tokenize_annotation(query)

def query_embedding(query: List[str]):
    x = encoder.annotation_encoder.batch_encode(query)
    if device != -1:
        x = x.cuda()
    embedding =  model.encoder(nrnn.pad_sequence([x]))
    embedding = embedding.data
    return embedding.view(len(query), -1)

synthesizer = BeamSearchSynthesizer(beam_size, model.predictor,
                                    encoder.action_sequence_encoder, is_subtype,
                                    max_steps=max_action_length)

model.load_state_dict(torch.load(os.path.join(model_dir_path, "best_model.pickle")))
candidates = []
for c, _ in synthesizer.synthesize(query, query_embedding(query_with_placeholders)):
    candidates.extend(c)
candidate = None
for c in candidates:
    if candidate is None:
        candidate = c
    else:
        if candidate.score < c.score:
            candidate = c

if candidate is None:
    code = ""
else:
    import ast
    try:
        code = unparse(to_python_ast(candidate.ast))
    except:  # noqa
        code = ""

print("Top-1")
print(code)

print("")
print("All Candidates")
for c in candidates:
    try:
        code = unparse(to_python_ast(c.ast))
        print(code)
        print("---")
    except:  # noqa
        pass