In [1]:
import os
from numpy import mean

from coeditor.c3problem import (
    C3ProblemGenerator,
    C3ProblemTokenizer,
    C3ToCodeCompletion,
    CompletionKind,
)

from coeditor.common import *
from coeditor.dataset import make_or_load_dataset
from coeditor.encoding import inline_output_tokens, tokens_to_change
from coeditor.experiments.code_completion import (
    C3CompletionGenerator,
    CodeT5Wrapper,
    FIMModel,
    infill_with_coeditor,
)
from coeditor.experiments.in_coder import InCoderWrapper
from coeditor.experiments.santa_coder import SantaCoderWrapper
from coeditor.model import RetrievalEditorModel

os.chdir(proj_root())

dataset_name = "perm2k"
device = "cuda"
N_test = 5000
use_additions = True
use_modifications = True

# first, load the test data, in FIM format
fim_probs = make_or_load_dataset(
    dataset_name,
    C3CompletionGenerator(
        use_additions=use_additions, use_modifications=use_modifications
    ),
    splits=("test",),
    time_limit_per_commit=30,
    remake_problems=False,
)["test"]
print(f"{len(fim_probs) = }")

# and in C3 format
c3_probs = make_or_load_dataset(
    dataset_name,
    C3ProblemGenerator(),
    splits=("test",),
    time_limit_per_commit=40,
)["test"]
transform = C3ToCodeCompletion(
    use_additions=use_additions, use_modifications=use_modifications
)
c3_probs = join_list(transform.transform(p) for p in c3_probs)
print(f"{len(c3_probs) = }")

pmap: _process_commits: 100%|██████████| 50/50 [08:51<00:00, 10.62s/repo]


Removing workdir: /tmp/dataset_from_projects/pid-29537
Time stats:


Unnamed: 0,name,count,avg_time,total_time
3,post_edit_analysis,16207,0.039852,645.882481
0,checkout,48671,0.009354,455.286273
1,parse_module,28126,0.010005,281.38913
5,process_change,16207,0.014285,231.519041
2,JModuleChange.from_modules,29669,0.002917,86.535892
4,pre_edit_analysis,16207,2e-06,0.029807


Dataset total size (n=50): 126.45 MB
len(fim_probs) = 17757


pmap: _process_commits: 100%|██████████| 50/50 [00:05<00:00,  9.17repo/s]


Dataset total size (n=1649): 5150.76 MB
len(c3_probs) = 17744


In [33]:
# keep only problems that appear in both sets
fim_ids = set(p.uid() for p in fim_probs)
c3_ids = set(p.uid() for p in c3_probs)
common_ids = fim_ids.intersection(c3_ids)
print(f"{len(common_ids) = }")
fim_probs = [p for p in fim_probs if p.uid() in common_ids]
fim_probs.sort(key=lambda p: p.uid())
c3_probs = [p for p in c3_probs if p.uid() in common_ids]
c3_probs.sort(key=lambda p: p.uid())

len(common_ids) = 17738


In [3]:
coeditor = RetrievalEditorModel.load(get_coeditor_model_path())
coeditor.half()
coeditor.to(device)

incoder6B = InCoderWrapper.from_pretrained("facebook/incoder-6B", half_precision=True)
incoder6B.model.to(device)
None

XGLMForCausalLM(
  (model): XGLMModel(
    (embed_tokens): Embedding(50518, 4096, padding_idx=1)
    (embed_positions): XGLMSinusoidalPositionalEmbedding()
    (layers): ModuleList(
      (0-31): 32 x XGLMDecoderLayer(
        (self_attn): XGLMAttention(
          (k_proj): Linear(in_features=4096, out_features=4096, bias=True)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=True)
          (q_proj): Linear(in_features=4096, out_features=4096, bias=True)
          (out_proj): Linear(in_features=4096, out_features=4096, bias=True)
        )
        (activation_fn): GELUActivation()
        (self_attn_layer_norm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (fc1): Linear(in_features=4096, out_features=16384, bias=True)
        (fc2): Linear(in_features=16384, out_features=4096, bias=True)
        (final_layer_norm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
      )
    )
    (layer_norm): LayerNorm((4096,), eps=1e-05, elementwise_affin

In [30]:
from coeditor.c3problem import C3Problem
from coeditor.encoding import decode_tokens
from coeditor.experiments.code_completion import FIMProblem
import torch

tknizer = C3ProblemTokenizer.for_eval()

def run_incoder(prob: FIMProblem):
    left_ctx = "\n".join(prob.left_ctx) + "\n"
    right_ctx = "\n" + "\n".join(prob.right_ctx)
    with torch.no_grad():
        pred = incoder6B.infill(left_ctx, right_ctx, max_length=128)
    if pred:
        pred = pred.split("\n")[0]  # only keep the first predicted line
    left_part = prob.left_ctx[-1] + "\n" if prob.left_ctx else ""
    right_part = "\n" + prob.right_ctx[0] if prob.right_ctx else ""
    pred_code = left_part + pred + right_part
    label_code = left_part + prob.middle + right_part
    correct = code_equal(pred_code, label_code)
    log = show_sections(
        ("Kind", prob.kind),
        ("Incoder output", pred),
        ("Label", prob.middle),
        ("Left context", left_ctx),
        ("Right context", right_ctx),
    )
    return correct, log

def run_coeditor(prob: C3Problem):
    tk_prob = tknizer.tokenize_problem(prob)
    output = infill_with_coeditor(coeditor, tk_prob)
    pred_code = tokens_to_change(inline_output_tokens(tk_prob.main_tks, output)).after
    label_code = tokens_to_change(
        inline_output_tokens(tk_prob.main_tks, tk_prob.output_tks)
    ).after
    correct = code_equal(pred_code, label_code)
    log = show_sections(
        ("Coeditor output", f"{decode_tokens(output)}"),
        ("Label", f"{decode_tokens(tk_prob.output_tks)}"),
        ("Main context", f"{decode_tokens(tk_prob.main_tks)}")
    )
    return correct, log

In [56]:
shuff_ids = [i for i, prob in enumerate(fim_probs) if prob.kind == "mod"]
random.Random(42).shuffle(shuff_ids)
print(f"{len(shuff_ids) = }")

len(shuff_ids) = 13396


In [57]:
cases = []

for i in tqdm(range(8, len(shuff_ids))):
    ex_id = shuff_ids[i]
    fim_prob = fim_probs[ex_id]
    c3_prob = c3_probs[ex_id]

    coeditor_correct, coeditor_log = run_coeditor(c3_prob)
    incoder_correct, incoder_log = run_incoder(fim_prob)
    if coeditor_correct and not incoder_correct:
        cases.append({"ex_id": ex_id, "coeditor_log": coeditor_log, "incoder_log": incoder_log})
        print(f"{len(cases)} found.")
        if len(cases) >= 10:
            break

  0%|          | 2/13388 [00:10<19:30:13,  5.25s/it]

1 found.


  0%|          | 9/13388 [00:43<17:33:03,  4.72s/it]

2 found.


  0%|          | 10/13388 [00:47<16:54:21,  4.55s/it]

3 found.


  0%|          | 11/13388 [00:52<17:33:30,  4.73s/it]

4 found.


  0%|          | 16/13388 [01:18<18:20:32,  4.94s/it]

5 found.


  0%|          | 21/13388 [01:44<19:37:46,  5.29s/it]

6 found.


  0%|          | 22/13388 [01:49<19:19:53,  5.21s/it]

7 found.


  0%|          | 25/13388 [02:03<17:22:09,  4.68s/it]

8 found.


  0%|          | 27/13388 [02:13<18:08:21,  4.89s/it]

9 found.


  0%|          | 30/13388 [02:32<18:52:09,  5.09s/it]

10 found.





In [67]:
case_dict = cases[8]
print("ex_id:", case_dict["ex_id"])
print(case_dict["coeditor_log"])
tk_prob = tknizer.tokenize_problem(c3_probs[case_dict["ex_id"]])
Path("output/coeditor_format.txt").write_text(tk_prob.show())

ex_id: 929
--------------------------------------------------------------------------------
Coeditor output:
<pad><s><extra_id_0> <add>     return get_async_backend().get_running_tasks()
</s>
--------------------------------------------------------------------------------
Label:
<extra_id_0> <add>     return get_async_backend().get_running_tasks()

--------------------------------------------------------------------------------
Main context:
# module: anyio._core._testing
def get_running_tasks() -> list[TaskInfo]:
    """
    Return a list of running tasks in the current event loop.

    :return: a list of task info objects

    """
 <del>     return get_asynclib().get_running_tasks()
<extra_id_0>



30201

In [68]:
print(case_dict["incoder_log"])
Path("output/incoder_log.txt").write_text(case_dict["incoder_log"])

--------------------------------------------------------------------------------
Kind:
mod
--------------------------------------------------------------------------------
Incoder output:
    return get_asynclib().get_running_tasks()
--------------------------------------------------------------------------------
Label:
    return get_async_backend().get_running_tasks()
--------------------------------------------------------------------------------
Left context:
# module: anyio._core._testing
class TaskInfo:
    """
    Represents an asynchronous task.

    :ivar int id: the unique identifier of the task
    :ivar parent_id: the identifier of the parent task, if any
    :vartype parent_id: Optional[int]
    :ivar str name: the description of the task (if any)
    :ivar ~collections.abc.Coroutine coro: the coroutine object of the task
    """

    __slots__ = '_name', 'id', 'parent_id', 'name', 'coro'

    def __init__(self, id: int, parent_id: int | None, name: str | None,
           

2316