In [1]:
filepattern = "tabby/dataset/data.jsonl"
api = "http://localhost:8080"
max_records = 3

In [2]:
import pandas as pd
from itables import init_notebook_mode
import itables.options as opt

init_notebook_mode(all_interactive=True)

<IPython.core.display.Javascript object>

In [6]:
from tabby_client import Client
from tabby_client.api.v1 import health
from tabby_client.api.v1 import completion

from tabby_client.models import CompletionRequest, CompletionRequest, Segments, Choice

import processing
import editdistance


def valid_item(item: processing.Item):
    count_body_lines = len(item.body.splitlines())

    if count_body_lines > 10:
        return False

    return True


def scorer(label, prediction):
    distance = editdistance.eval(label, prediction)
    return max(0.0, 1.0 - distance / len(label))


def run_eval():
    client = Client(base_url=api)
    try:
        health.sync(client=client)
    except:
        print(f"Tabby Server is not ready, please check if '{api}' is correct.")
        return

    num_records = 0
    for item in processing.items_from_filepattern(filepattern):
        if not valid_item(item):
            continue

        request = CompletionRequest(
            language=item.language, segments=Segments(prefix=item.prefix)
        )

        resp: CompletionResponse = completion.sync(client=client, json_body=request)
        label = item.body
        prediction = resp.choices[0].text

        block_score = scorer(label, prediction)
        line_score = scorer(label.splitlines()[0], prediction.splitlines()[0])

        yield dict(
            prompt=item.prefix,
            prediction=prediction,
            label=label,
            block_score=block_score,
            line_score=line_score,
        )

        num_records += 1
        if num_records >= max_records:
            break

In [7]:
df = pd.DataFrame(list(run_eval()))

In [9]:
print(df.columns)

Index(['prompt', 'prediction', 'label', 'block_score', 'line_score'], dtype='object')


In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(nrows=1, ncols=2, sharex=True, sharey=True, figsize=(15, 5))

df.hist(
    column="line_score",
    ax=axes[0],
)

df.hist(
    column="block_score",
    ax=axes[1],
)

In [None]:
codeStyle = {
    "selector": "td",
    "props": [
        ("white-space", "pre"),
        ("font-family", "monospace"),
        ("text-align", "left"),
    ],
}

df.style.set_table_styles(
    {
        "prompt": [codeStyle],
        "prediction": [codeStyle],
        "label": [codeStyle],
    }
)