In [None]:
!pip install transformers datasets evaluate pylint accelerate peft -q

: 

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")


: 

In [None]:
from datasets import load_dataset

mbpp = load_dataset("mbpp", split="train[:100]")  # Use a small subset

In [None]:
def generate_code(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=128)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [None]:
import subprocess
import tempfile

def run_unit_test(code, test_code):
    with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as f:
        f.write((code + "\\n" + test_code).encode())
        f.flush()
        result = subprocess.run(["python3", f.name], capture_output=True, text=True)
    return result.returncode == 0, result.stderr

def run_pylint(code):
    with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as f:
        f.write(code.encode())
        f.flush()
        result = subprocess.run(["pylint", f.name, "--score", "y", "-rn", "-sn"], capture_output=True, text=True)
    return result.stdout


In [None]:
def construct_training_example(prompt, generated_code, test_result, pylint_feedback, reference_solution):
    return {
        "input": f"# Problem:\\n{prompt}\\n# Generated Code:\\n{generated_code}\\n# Unit Test Feedback:\\n{test_result}\\n# Pylint Feedback:\\n{pylint_feedback}",
        "output": reference_solution
    }
