### Initialization
* Check whether the runtime is host or local.
* Mount Google Drive when using the host runtime.

In [0]:
try:
    from google.colab import drive
    drive.mount('/gdrive')
    runtime = "host"
except:
    runtime = "local"

### Parameters

In [0]:
#@markdown |Name  |Description|
#@markdown |:---  |:---|
#@markdown |`seed`|The random seed|
seed = 20367 #@param {type: "number"}

#@markdown ### `nlprog` Repositories
#@markdown |Name            |Description|
#@markdown |:---            |:---|
#@markdown |`repository_url`|The URL of `nlprog` git repository (enabled only in the host runtime)|
#@markdown |`branch_name`   |The branch name (enabled only in the host runtime)|
repository_url = "https://github.com/HiroakiMikami/NL2Prog" #@param {type: "string"}
branch_name = "master" #@param {type: "string"}

#@markdown ### Dataset Settings
#@markdown |Name               |Description|
#@markdown |:---               |:---|
#@markdown |`max_action_length`|The maximum action length|
#@markdown |`word_threshold`   ||
#@markdown |`token_threshold`  ||
max_action_length = 10 #@param {type: "number"}
word_threshold =  3 #@param {type: "number"}
token_threshold = 0 #@param {type: "number"}

#@markdown ### Model Parameters
#@markdown |Name                     |Description|
#@markdown |:---                     |:---|
#@markdown |`embedding_dim`          |The dimension of word, token, and rule embeddings|
#@markdown |`node_type_embedding_dim`|The dimension of node type embedding dim|
#@markdown |`lstm_state_size`        |The size of LSTM state|
#@markdwon |`hidden_state_size`      |The size of attention hidden state|
embedding_dim = 128 #@param {type: "number"}
node_type_embedding_dim = 64 #@param {type: "number"}
lstm_state_size = 256 #@param {type: "number"}
hidden_state_size = 50 #@param {type: "number"}

#@markdown ### Training Settings
#@markdown |Name        |Description|
#@markdown |:---        |:---|
#@markdown |`batch_size`|The minibatch size|
#@markdown |`dropout`   |The probability of dropout|
#@markdown |`num_iters` |The number of training iterations|
batch_size = 1 #@param {type: "number"}
dropout = 0.2 #@param {type: "number"}
num_iters = 10 #@param {type: "number"}

#@markdown ### Evaluation Settings
#@markdown |Name           |Description|
#@markdown |:---           |:---|
#@markdown |`beam_size`    |The beam size|
beam_size = 15 #@param {type: "number"}

#@markdown ### Other Settings
#@markdown |Name    |Description|
#@markdown |:---    |:---|
#@markdown |`device`|The id of GPU. `-1` means that CPU is used.|
device = 0 #@param {type: "number"}

#@markdown ### Filepathes
#@markdown |Name                 |Description|
#@markdown |:---                 |:---|
#@markdown |`output_dir_path`    |The directory of the directory that will contain the profiling results.|
output_dir_path = "/gdrive/My Drive/NL2Prog/hearthstone/nl2code/profile" #@param {type: "string"}


### Setup
* Download the codebase (when using the host runtime)
  1. Clone git repository and move to the specified branch
  2. Install modules
* Use GPU
* Fix the random seed

In [0]:
if runtime == "host":
    %cd /content
    !rm -rf NL2Prog
    !git clone $repository_url NL2Prog
    %cd NL2Prog
    !git checkout $branch_name
    !pip install .
# load tqdm
!pip install --force https://github.com/chengs/tqdm/archive/colab.zip

In [0]:
import torch
if device != -1:
    torch.cuda.set_device(device)

In [0]:
import numpy as np
import random
import torch

SEED_MAX = 2**32 - 1

root_rng = np.random.RandomState(seed)
random.seed(root_rng.randint(SEED_MAX))
np.random.seed(root_rng.randint(SEED_MAX))
torch.manual_seed(root_rng.randint(SEED_MAX))

### Setup training
* Load the dataset
* Split the dataset into train, test, valid
* Create and save encoder
* Prepare dataset
* Create model
* Create optimizer
* Prepare evaluation
* Load checkpoint

In [0]:
from nl2prog.dataset.hearthstone import download
dataset = download()

In [0]:
train_raw_dataset = dataset["train"]

In [0]:
from nl2prog.encoders import Encoder
from nl2prog.utils.data import get_samples
from nl2prog.utils.python import tokenize_query, tokenize_token
from nl2prog.language.action import code_to_action_sequence as to_seq
from nl2prog.language.python import parse
import pickle
import os

to_action_sequence = lambda x: to_seq(x, parse, tokenize=tokenize_token)
samples = get_samples(train_raw_dataset, tokenize_query, tokenize_token,
                      to_action_sequence)
encoder = Encoder(samples, word_threshold, token_threshold)


In [0]:
from nl2prog.utils.data import ListDataset
from nl2prog.utils.data.nl2code import to_train_dataset
from nl2prog.utils.data import to_eval_dataset
train_dataset = to_train_dataset(ListDataset(train_raw_dataset[:batch_size * num_iters]),
                                 tokenize_query, tokenize_token,
                                 to_action_sequence, encoder)
test_dataset = to_eval_dataset(ListDataset(train_raw_dataset[:1]))

In [0]:
from nl2prog.nn.nl2code import TrainModel
model = TrainModel(encoder, embedding_dim, node_type_embedding_dim,
                   lstm_state_size, hidden_state_size,
                   dropout)
if device != -1:
    model = model.cuda()

In [0]:
import torch.optim as optim
optimizer = optim.Adam(model.parameters())

In [0]:
import nl2prog.nn.utils.rnn as nrnn
from nl2prog.utils import synthesize as _synthesize
from nl2prog.utils.nl2code import BeamSearchSynthesizer
from nl2prog.language.python import is_subtype, parse, unparse
from nl2prog.metrics import Accuracy
from nl2prog.metrics.python import Bleu

synthesizer = BeamSearchSynthesizer(beam_size, tokenize_query,
                                    model.encoder, model.predictor,
                                    encoder, is_subtype,
                                    max_steps=max_action_length)

def synthesize(query: str):
    return _synthesize(query, synthesizer)

accuracy = Accuracy(parse, unparse)
bleu = Bleu(parse, unparse)
metrics = { "accuracy": accuracy, "bleu": bleu }

### Profile
* Training
* Evaluation

In [0]:
import os
import pickle
import torch
from torch.utils.data import DataLoader
import torch.nn.utils.rnn as rnn
from torch.autograd.profiler import profile
from nl2prog.nn.nl2code import Loss
import nl2prog.nn.utils.rnn as nrnn
from nl2prog.utils.data.nl2code import collate_train_dataset


loss_function = Loss()
model.train()
loader = DataLoader(train_dataset, batch_size=batch_size,
                    shuffle=True,
                    num_workers=4,
                    collate_fn=collate_train_dataset)

with profile(use_cuda=True, record_shapes=True) as prof:
    for query, action, prev_action in loader:
        query = rnn.pack_sequence(query, enforce_sorted=False)
        action = rnn.pack_sequence(action, enforce_sorted=False)
        prev_action_train = [x[:-1] for x in prev_action]
        action_ground_truth = [x[1:] for x in prev_action]
        prev_action_train = rnn.pack_sequence(prev_action_train, enforce_sorted=False)
        action_ground_truth = rnn.pack_sequence(action_ground_truth, enforce_sorted=False)
        if device != -1:
            query = query.cuda()
            action = action.cuda()
            prev_action_train = prev_action_train.cuda()
            action_ground_truth = action_ground_truth.cuda()
        query = nrnn.pad_packed_sequence(query, padding_value=-1)
        action = nrnn.pad_packed_sequence(action, padding_value=-1)
        prev_action_train = nrnn.pad_packed_sequence(prev_action_train, padding_value=-1)
        action_ground_truth = nrnn.pad_packed_sequence(action_ground_truth, padding_value=-1)

        rule_prob, token_prob, copy_prob, _, _ = model(query, action, prev_action_train)
        loss = loss_function(rule_prob, token_prob, copy_prob, action_ground_truth)
        model.zero_grad()
        loss.backward()
        optimizer.step()

os.makedirs(output_dir_path, exist_ok=True)
with open(os.path.join(output_dir_path, "training_prof.pickle"), "wb") as file:
    pickle.dump(prof, file)

print(prof.key_averages().table(sort_by="self_cpu_time_total"))

In [0]:
import os
import pickle
import cProfile
import torch
import torch.nn.utils.rnn as rnn
from torch.autograd.profiler import profile
import nl2prog.nn.utils.rnn as nrnn
from nl2prog.utils import evaluate

model.eval()
pr = cProfile.Profile()
pr.enable()
with profile(use_cuda=True, record_shapes=True) as prof:
    result = evaluate(test_dataset, synthesize, top_n=[1], metrics=metrics)
pr.disable()
pr.create_stats()

os.makedirs(output_dir_path, exist_ok=True)
pr.dump_stats(os.path.join(output_dir_path, "evaluation.cprof"))
with open(os.path.join(output_dir_path, "evaluation_prof.pickle"), "wb") as file:
    pickle.dump(prof, file)

pr.print_stats(sort="cumulative")
print("---")
print(prof.key_averages().table(sort_by="self_cpu_time_total"))
