In [6]:
%load_ext autoreload
%autoreload 2

from coeditor.common import *
import os

from coeditor.dataset import TokenizedEditDataset, is_repetitive_edit
from coeditor.model import CoeditorModel, _Tokenizer, DecodingArgs, EvalArgs
from coeditor.history import show_change
from coeditor.encoding import TokenizedEdit, decode_tokens, WindowArgs, tokens_to_change
import shutil
import random

os.chdir(proj_root())

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
n_samples = 50
wargs = WindowArgs(4096)

test_data_name = "small"
test_data: TokenizedEditDataset = pickle_load(
    get_dataset_dir(test_data_name) / "tokenized-file_based" / "test.pkl"
)
test_data = test_data.map(lambda e: e.truncate_ctx(wargs))

rep_edits = [e for e in test_data.all_edits() if is_repetitive_edit(e)]
print("Total edits: ", len(list(test_data.all_edits())))
print("Repetitive edits: ", len(rep_edits))

# random.seed(123)
# random.shuffle(rep_edits)
# rep_edits = rep_edits[:n_samples]


Total edits:  2398
Repetitive edits:  191


In [8]:
# model_dir = get_model_dir(trained=True) / "small"
model_dir = get_model_dir(trained=True) / "coeditor-small-skip"
model = CoeditorModel.load_pretrained(model_dir)
model.to("cuda:1")

eval_args = EvalArgs(4096 * 2, WindowArgs(4096))


In [14]:
model.skip_unchanged=True

rep_data = TokenizedEditDataset.from_edits(rep_edits)
rep_result = model.eval_on_data(rep_data, eval_args)
display(rep_result)


evaluate loss: 100%|██████████| 57/57 [00:30<00:00,  1.88batch/s]


{'loss_per_ex': (mean=6.687, weight=191),
 'loss_per_tk': (mean=0.17636, weight=7242),
 'prob_per_ex': (mean=0.17349, weight=191)}

In [19]:
all_result = model.eval_on_data(test_data, eval_args)
display(all_result)


evaluate loss: 100%|██████████| 663/663 [05:32<00:00,  1.99batch/s]


{'loss_per_ex': (mean=41.051, weight=2398),
 'loss_per_tk': (mean=0.76586, weight=128535),
 'prob_per_ex': (mean=0.035363, weight=2398)}

In [18]:
out_dir = Path("output/inspect_coeditor") / model_dir.name
shutil.rmtree(out_dir, ignore_errors=True)
(out_dir / "correct").mkdir(parents=True, exist_ok=True)
(out_dir / "incorrect").mkdir(parents=True, exist_ok=True)

decode_args = DecodingArgs()
exact_match = WeightedSum(0, 0)
predictions = []

for i, ex in enumerate(tqdm(rep_edits)):
    pred_tks = model.predict(ex.input_tks, decode_args)
    predictions.append(pred_tks)
    pred_change = TokenizedEdit(ex.path, ex.input_tks, pred_tks).as_change(True)
    truth_change = ex.as_change(True)
    is_correct = normalize_code_by_ast(pred_change.after) == normalize_code_by_ast(truth_change.after)
    exact_match += WeightedSum(int(is_correct), 1)

    compare_str = ex.show_prediction(pred_tks)
    out_file = out_dir / ("correct" if is_correct else "incorrect") / f"ex-{i}.txt"
    out_file.write_text(compare_str)

print("Exact match: ", exact_match.average())


100%|██████████| 191/191 [00:00<00:00, 238.80it/s]

Exact match:  0.38219895287958117



