In [None]:
import pandas as pd
import numpy as np
import json
import re
import os

from transformers import AutoTokenizer, AutoModelForCausalLM

import torch

In [None]:
DATA_FOLDER = '../data'
MODELS_FOLDER = '../data/DocProperties/incore-exporter/Workflow.DTO/Models'

In [None]:
model_id = "Qwen/Qwen3-0.6B"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)

In [None]:
snippets = []
with open(f"{DATA_FOLDER}/DocProperties_JSONL.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        snippets.append(json.loads(line))

In [None]:
prompt_template = lambda code: f"Explain what this C# code does:\n{code}\nПояснення:"

def generate_explanation(code):
    input_text = prompt_template(code)
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(model.device)
    output = model.generate(
        input_ids,
        max_new_tokens=128,
        do_sample=True,
        temperature=0.5,
        repetition_penalty=1.2,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )
    generated = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated.replace(input_text, "", 1).strip().split("\n")[0].strip()

In [None]:
for s in snippets:
    s["explanation"] = generate_explanation(s["messages"][1]["content"])
snippets.sort(key=lambda s: s["id"])
snippets