In [2]:
import torch

from modeldef import load_model

MODEL_NAME = "facebook/incoder-1B"
if not torch.cuda.is_available():
    exit(-1)
device = torch.device("cuda")


In [3]:
model, tokenizer = load_model(MODEL_NAME)
model = model.cuda()


In [4]:
from tqdm import tqdm
from torchtext.data.metrics import bleu_score

import modeldef


def evaluate(prompt_model, tokenizer, dataloader):
    predictions = []
    ground_truths = []
    print("\nbegin evaluation")

    prompt_model.eval()
    pbar = tqdm(dataloader, desc="Evaluate")

    for inputs in pbar:
        output_sentence = modeldef.infill(prompt_model, tokenizer, inputs)
        predictions.extend(output_sentence["text"].strip())
        ground_truths.extend(inputs.strip())

    assert len(predictions) == len(ground_truths), (
        len(predictions),
        len(ground_truths),
    )
    # show one example
    print(f"predictions {predictions[0]}, ground_truths {ground_truths[0]}")

    score = bleu_score(predictions, ground_truths)
    return score


In [5]:
from enum import Enum
import random
from modeldef import FILE, INFILL


class SampleMethod(Enum):
    Begin = "begin"
    End = "end"
    Span = "span"
    All = "all"


def datum2sample(j, sample_method: SampleMethod):
    comment: list[str] = j["nl"]

    tokens: list[str] = j["code_tokens_normalized"]
    randl = random.randrange(0, len(tokens))
    randr = random.randrange(randl, len(tokens))

    def _begin():
        return [INFILL, *tokens[randl:]], tokens[:randl]
    def _end():
        return [*tokens[:randr], INFILL, FILE], tokens[randr:]
    def _span():
        return [*tokens[:randl], INFILL, *tokens[randr:]], tokens[randl:randr]
    def _all():
        return [INFILL, FILE], tokens[:]

    unmasked, masked = {
        SampleMethod.All: _all,
        SampleMethod.Begin: _begin,
        SampleMethod.End: _end,
        SampleMethod.Span: _span,
    }[sample_method]()

    return comment, unmasked, masked


def concat_tokens(l: list[str]):
    ret = l[0]
    newline = False
    for t in l[1:]:
        if not newline:
            ret += ' '
        else:
            newline = False
        ret += t

        if t.endswith("\n"):
            newline = True

    return ret


In [6]:
import json
from pathlib import Path


datadir = Path("data")
dev = datadir / "dev_sampled_preprocessed.json"
test = datadir / "test_sampled_preprocessed.json"
train = datadir / "train_sampled_preprocessed.json"
paths = [dev, test, train]

for path in paths:
    j = json.load(path.open("r"))


In [13]:
from modeldef import infill
from pprint import pprint



In [56]:
example = """
# Generate the data according to $$ y = \\sum { i=0 } ^ 5 \\betaix^i+e, $$ where $ \\beta $ are random random integers from [2, 11) , $x$ is the grid of 33 equidistant points on [0.1, 0.9] , and $ e\\sim N (0, 7) $. Plot the data.

import matplotlib.pylab as plt

<infill>
plt.plot(x, y, 'o', label='Data')
plt.legend(loc='best')
plt.show()
"""


pred = infill(model, tokenizer, example.split(INFILL))
pprint(pred["infills"])

['x = np.linspace(0.1, 0.9,33)\ny = 0.1 * x + 7 * np.random.randn(33)']


In [57]:
example = """
# Generate the data according to $$ y = \\sum { i=0 } ^ 5 \\betaix^i+e, $$ where $ \\beta $ are random random integers from [0, 10) , $x$ is the grid of 50 equidistant points on [0, 1] , and $ e\\sim N (0, 5) $.

import matplotlib.pylab as plt

beta = np.random.randint(0, 10, 6)
npoints = 50
x = np.linspace(0, 1, npoints)
y = np.poly1d(beta)(x)
y += np.random.normal(0, 5, y.shape)

# Generate the data according to $$ y = \\sum { i=0 } ^ 5 \\betaix^i+e, $$ where $ \\beta $ are random random integers from [2, 11) , $x$ is the grid of 33 equidistant points on [0.1, 0.9] , and $ e\\sim N (0, 7) $. Plot the data.

import matplotlib.pylab as plt

<infill>
plt.plot(x, y, 'o', label='Data')
plt.legend(loc='best')
plt.show()
"""


pred = infill(model, tokenizer, example.split(INFILL))
pprint(pred["infills"])


['beta = np.random.randint(0, 10, 6)\n'
 'npoints = 33\n'
 'x = np.linspace(0.1, 0.9, npoints)\n'
 'y = np.poly1d(beta)(x)\n'
 'y += np.random.normal(0, 7, y.shape)\n'
 '\n'
 '# Plot the data.']


In [58]:
example = """
# Generate the data according to $$ y = \\sum { i=0 } ^ 5 \\betaix^i+e, $$ where $ \\beta $ are random random integers from [0, 10) , $x$ is the grid of 50 equidistant points on [0, 1] , and $ e\\sim N (0, 5) $. Plot the data.

import matplotlib.pylab as plt

beta = np.random.randint(0, 10, 5+1)
npoints = 50
x = np.linspace(0, 1, npoints)
y = np.poly1d(beta)(x)
y += np.random.normal(0, 5, y.shape)
plt.plot(x, y, 'o', label='Data')
plt.legend(loc='best')
plt.show()

# $ l\\sim N (4, 5) $,
l = np.random.normal(4, 5, y.shape)

# Generate the data according to $$ p = \\sum { i=0 } ^ 7 cx $$, where $ c $ are random integers from [2.6, 3.7), $x$ is the grid of 80 equidistant points on [5, 6]
c = np.random.randint(2.6, 3.7, 7+1). Plot the data.
npoints = 80
x = np.linspace(5, 6, npoints)
p = np.poly1d(r)(x)
plt.plot(x, p, 'o', label='Data')
plt.legend(loc='best')
plt.show()

# Generate the data according to $$ z = \\sum { i=0 } ^ 3 \\betaix^i+e, $$ where $ \\beta $ are random random integers from [2, 11) , $x$ is the grid of 33 equidistant points on [0.1, 0.9] , and $ e\\sim N (1, 7) $. Plot the data.

import matplotlib.pylab as plt\n\n<infill>\n<|/ file |>
"""


pred = infill(model, tokenizer, example.split(INFILL))
pprint(pred["infills"])


['beta = np.random.randint(2, 11, 3+1)\n'
 'npoints = 33\n'
 'x = np.linspace(0.1, 0.9, npoints)\n'
 'y = np.poly1d(beta)(x)\n'
 'y += np.random.normal(0, 7, y.shape)\n'
 "plt.plot(x, y, 'o', label='Data')\n"
 "plt.legend(loc='best')\n"
 'plt.show()']


In [60]:
example = """
@ti.func
def create_ball(r, p: vec3, color):
    ''' create a ball at position `p` with radius `r` and color `color` '''
    for i, j, k in ti.ndrange((-r, r), (-r, r), (-r, r)):
        x = ivec3(i, j, k)
        if distance(x, vec3(0, 0, 0)) + 0.5 <= feed_r:
            scene.set_voxel(p + vec3(i, j, k), 2, color)

# create a red ball at (1, 1, 1) with radius of 5
create_ball(<infill>)
<|/ file |>
"""

pred = infill(model, tokenizer, example.split(INFILL))
pprint(pred["infills"])


['vec3(1, 1, 1), vec3(1, 1, 1), vec3(1, 0, 0))\n'
 '\n'
 '# create a green ball at (0, 1, 1) with radius of 5\n'
 'create_ball(vec3(0, 1, 1), vec3(0, 1, 1), vec3(0, 1, 0))\n'
 '\n'
 '# create a blue ball at (0, 0, 1) with radius of 5\n'
 'create_ball(vec3(0, 0, 1), vec3(0, 0, 1), vec3(0, 0, 0))\n'
 '\n'
 '# create a yellow ball at (1, 1, 0) with radius of 5\n'
 'create_ball(vec3(1, 1, 0), vec3(1, 1, 0), vec3(1, 0, 0))\n'
 '\n'
 '# create a white ball at (0, 1, 0) with radius of 5\n'
 'create_ball(vec3(0, 1, 0), vec3(0, 1, 0), vec']


In [99]:
example = """
@ti.func
def create_ball(r, p: vec3, color):
    ''' create a ball at position `p` with radius `r` and color `color` '''
    for i, j, k in ti.ndrange((-r, r), (-r, r), (-r, r)):
        x = ivec3(i, j, k)
        if distance(x, vec3(0, 0, 0)) <= r:
            scene.set_voxel(p + vec3(i, j, k), 2, color)

# create a red ball "some ball" at (1, 1, 1) with radius of 5
create_ball(5, vec3(1, 1, 1), vec3(255, 0, 0))

# create a white ball with radius bigger than "some ball"
create_ball(6.7, vec3(1, 1, 1), vec3(255, 255, 255))

# create a crimson ball with radius bigger than "some ball"
create_ball(10, vec3(1, 1, 1), vec3(220, 20, 60))

# create a crimson ball with radius smaller than "some ball"
create_ball(1, vec3(1, 1, 1), vec3(220, 20, 60))

# create a blue ball "this ball" at (-1, 0.7, 9) with radius of 2.3
<infill>

# create a green ball at the same position with radius bigger than "this ball"
<infill>
<|/ file |>
"""

pred = infill(model, tokenizer, example.split(INFILL), temperature=0.1)
pprint(pred["infills"])


['create_ball(2.3, vec3(-1, 0.7, 9), vec3(0, 0, 255))',
 'create_ball(3.5, vec3(-1, 0.7, 9), vec3(0, 255, 0))\n'
 '\n'
 '# create a green ball at the same position with radius smaller than "this '
 'ball"\n'
 'create_ball(3.5, vec3(-1, 0.7, 9), vec3(0, 0, 0))\n'
 '\n'
 '# create a green ball "this ball" at (-1, 0.7, 9) with radius of 2.3\n'
 'create_ball(2.3, vec3(-1, 0.7, 9), vec3(0, 0, 0))\n'
 '\n'
 '# create a green ball "this ball" at (-1, 0.7, 9) with radius of 2.3\n'
 'create_ball(2.3, vec3(-1, 0.7, 9), vec3(0, 0, 0))\n'
 '\n'
 '# create a green ball "this ']


In [105]:
example = """
# white
color = vec3(0, 0, 0)

# create "some ball"
create_ball(1, vec3(0, 0, 0), color)

# place another ball on the top of "some ball" with radius of 1
create_ball(1, vec3(0, 0, 2), color)

# place another ball on the top of "some ball"
create_ball(2, vec3(0, 0, 3), color)

# create "yet some ball" with radius of 4
create_ball(4, vec3(2, 3, 2), color)

# place another ball on the top of "yet some ball"
create_ball(1, vec3(2, 3, 7), color)

# place another ball on the top of "some ball"
create_ball(2, vec3(2, 3, 8), color)

# create a red ball at position (1, 1, 1) with radius of 5
<infill>

# place another ball on the top of the red ball with radius of 3
<infill>
<|/ file |>
"""

pred = infill(model, tokenizer, example.split(INFILL))
pprint(pred["infills"])


['create_ball(1, vec3(1, 1, 1), color)',
 'create_ball(1, vec3(1, 1, 2), color)\n'
 '\n'
 '# place another ball on the top of the red ball\n'
 'create_ball(2, vec3(1, 1, 3), color)\n'
 '\n'
 '# place another ball on the top of the red ball\n'
 'create_ball(3, vec3(1, 1, 4), color)\n'
 '\n'
 '# place another ball on the top of the red ball\n'
 'create_ball(4, vec3(1, 1, 5), color)\n'
 '\n'
 '# create a green ball at position (2, 2, 1) with radius of 6\n'
 'create_ball(1, vec3(2, 2, 1), color)\n'
 '\n'
 '# place another ball on the top of the green ball with radius of 3\n'
 'create_ball(1, vec3(2, 2, 2), color)\n'
 '\n'
 '# place another ball on the top of the green ball\n'
 'create_ball']
