### Initialization
* Check whether the runtime is host or local.
* Mount Google Drive when using the host runtime.

In [0]:
try:
  from google.colab import drive
  drive.mount('/gdrive')
  runtime = "host"
except:
  runtime = "local"

### Parameters

In [0]:
#@title Parameters
#@markdown |Name            |Description|
#@markdown |:---            |:---|
#@markdown |`seed`|The random seed|
seed = 3984 #@param {type: "number"}

#@markdown ### `deep-coder` Repositories
#@markdown |Name            |Description|
#@markdown |:---            |:---|
#@markdown |`repository_url`|The URL of `deep-coder` git repository (enabled only in the host runtime)|
#@markdown |`branch_name`   |The branch name (enabled only in the host runtime)|
repository_url = "https://github.com/HiroakiMikami/deep-coder" #@param {type: "string"}
branch_name = "master" #@param {type: "string"}

#@markdown ### Dataset Filepathes
#@markdown |Name                |Description|
#@markdown |:---                |:---|
#@markdown |`train_dataset_path`|The file path of the training dataset.|
#@markdown |`valid_dataset_path`|The file path of the validation dataset.|
train_dataset_path = "dataset/train.pickle" #@param {type: "string"}
valid_dataset_path = "dataset/valid.pickle" #@param {type: "string"}



### Setup
* Fix the random seed
* Download the codebase (when using the host runtime)
  1. Clone git repository and move to the specified branch
  2. Initialize submodule
  3. Build the `search` tool
  4. Install chainer and cupy
* Define common functions

In [0]:
import numpy as np
import random

SEED_MAX = 2**32 - 1

root_rng = np.random.RandomState(seed)
random.seed(root_rng.randint(SEED_MAX))
np.random.seed(root_rng.randint(SEED_MAX))

In [0]:
if runtime == "host":
  %cd /content
  !rm -rf deep-coder
  ![ ! -e deep-coder ] && git clone $repository_url deep-coder
  %cd deep-coder
  !git checkout origin/$branch_name
  !git submodule init
  !git submodule update
  !make -C DeepCoder_Utils/enumerative-search -j `nproc`
  !curl https://colab.chainer.org/install | sh -
  !pip install tqdm

In [0]:
def is_input(line):
    return 1 if ("<- int" in line or "<- [int]" in line) else 0

### Load Datasets
* Load dataset

In [0]:
import pickle

with open(train_dataset_path, "rb") as f:
  train_dataset = pickle.load(f)

with open(valid_dataset_path, "rb") as f:
  valid_dataset = pickle.load(f)

### Visualize Datasets
* Show the graph of #inputs and #entries in the dataset.

In [0]:
#@markdown ### Visualization Parameters
#@markdown |Name    |Description|
#@markdown |:---    |:---|
#@markdown |`width` |The width of the matplotlib plot|
#@markdown |`height`|The height of the matplotlib plot|
#@markdown ---
width = 12 #@param {type: "slider", min: 1, max: 48, step: 1}
height = 4 #@param {type: "slider", min: 1, max: 48, step: 1}


In [0]:
import matplotlib.pyplot as plt

train = [sum(map(is_input, entry.source_code.split("\n"))) for entry, in train_dataset.dataset]
valid = [sum(map(is_input, entry.source_code.split("\n"))) for entry, in valid_dataset.dataset]

plt.figure(figsize=(width, height))
plt.hist(train)
plt.hist(valid)
plt.legend(["training", "validation"])
plt.xlabel("#Inputs")
plt.ylabel("#Entries")

* Show the graph of body-length and #entries in the dataset.


In [0]:
#@markdown ### Visualization Parameters
#@markdown |Name    |Description|
#@markdown |:---    |:---|
#@markdown |`width` |The width of the matplotlib plot|
#@markdown |`height`|The height of the matplotlib plot|
#@markdown ---
width = 12 #@param {type: "slider", min: 1, max: 48, step: 1}
height = 4 #@param {type: "slider", min: 1, max: 48, step: 1}


In [0]:
import matplotlib.pyplot as plt

train = [len(entry.source_code.split("\n")) - sum(map(is_input, entry.source_code.split("\n"))) for entry, in train_dataset.dataset]
valid = [len(entry.source_code.split("\n")) - sum(map(is_input, entry.source_code.split("\n"))) for entry, in valid_dataset.dataset]

plt.figure(figsize=(width, height))
plt.hist(train)
plt.hist(valid)
plt.legend(["training", "validation"])
plt.xlabel("length of body")
plt.ylabel("#Entries")

* Show the prior distribution of the training dataset

In [0]:
#@markdown ### Visualization Parameters
#@markdown |Name    |Description|
#@markdown |:---    |:---|
#@markdown |`width` |The width of the matplotlib plot|
#@markdown |`height`|The height of the matplotlib plot|
#@markdown ---
width = 36 #@param {type: "slider", min: 1, max: 48, step: 1}
height = 8 #@param {type: "slider", min: 1, max: 48, step: 1}


In [0]:
import matplotlib.pyplot as plt
from src.dataset import prior_distribution

# prior-distribution
prior = prior_distribution(train_dataset.dataset)
columns = []
data = []
for symbol, prob in prior.items():
    columns.append(symbol)
    data.append(prob)
data = np.array(data)

# Show plot
xs = np.arange(len(columns)) + 1
plt.figure(figsize=(width, height))
plt.bar(xs, data, width=0.4, bottom=np.zeros(1), tick_label=columns)
plt.ylabel("Probability")
plt.xlabel("Symbol")
plt.title("Prior Distribution")


* Show the detail of the specified entry

In [0]:
#@markdown ### Visualization Parameters
#@markdown |Name     |Description|
#@markdown |:---     |:---|
#@markdown |`width`  |The width of the matplotlib plot|
#@markdown |`height` |The height of the matplotlib plot|
#@markdown |`dataset`|The dataset contains the specified entry|
#@markdown |`index`  |The index of the specified entry|
#@markdown ---
width = 36 #@param {type: "slider", min: 1, max: 48, step: 1}
height = 1 #@param {type: "slider", min: 1, max: 48, step: 1}
dataset = "train" #@param ["train", "valid"]
index = 0 #@param {type: "number"}


In [0]:
from matplotlib import colors
import matplotlib.cm as cm
import matplotlib.pyplot as plt

m = cm.ScalarMappable(norm=colors.Normalize(vmin=0, vmax=1), cmap=cm.Greens)

plt.figure(figsize=(width, height))
    
def show_entry(title, entry):
    print(title)
    print("Source Code")
    print(entry.source_code)

    print()
    print("Examples")
    for i, example in enumerate(entry.examples):
        print("Example {}".format(i))
        for j, input in enumerate(example.inputs):
            print("  input {}: {}".format(j, input))
        print("  output: {}".format(example.output))

    plt.title("Attributes")
    plt.gca().yaxis.set_visible(False)
    data = np.ones(len(entry.attribute))
    colors = []
    for _, v in entry.attribute.items():
        colors.append(m.to_rgba(1 if v else 0))
    xs = np.arange(len(entry.attribute)) + 10
    plt.bar(xs, data, width=0.9, bottom=np.zeros(1),
            color=colors, tick_label=list(entry.attribute.keys()))

entry = (train_dataset if dataset == "train" else valid_dataset).dataset[index][0]
show_entry("{} {}".format(dataset, index), entry)
