# Génération de code et tests

In [5]:
import json
from parser import *
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
import torch
import os
from tqdm import tqdm

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
torch.cuda.empty_cache()


### PARAMETERS

filename = "./test.json"
gen_per_prompt = 3
summary_output = "./stats.md"
generation_output = "./gens.txt"

# Path to the extracted model
model_path = "./starcoder_elm_finetuned"

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
bnb_config = BitsAndBytesConfig(load_in_4bit=True)
model = AutoModelForCausalLM.from_pretrained(model_path, quantization_config=bnb_config)

# Setup the generation pipeline
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Load the JSON data
with open(filename) as f:
    data = json.load(f)

successes = 0
fails = 0

with open(summary_output, 'w') as f2:
    with open(generation_output, 'w') as f3:
        # Header
        f2.write("| Prompt No | Successes | Fails | Success rate |\n")
        f2.write("| ----------- | ----------- | ----------- | ----------- |\n")

        # Progress bar here
        for num, item in enumerate(tqdm(data, desc="Generating completions")):
            prompt = item["prompt"]

            prompt_successes = 0
            prompt_fails = 0

            results = generator(prompt, max_length=50, num_return_sequences=gen_per_prompt, do_sample=True, temperature=0.7, top_p=0.95)
            f3.write(f"--- Prompt {num+1} ---\n")
            f3.write(prompt+"\n\n")
            for i, result in enumerate(results):
                try:
                    parsed, msg = parse_elm(result["generated_text"])
                    if parsed:
                        prompt_successes += 1
                    else:
                        prompt_fails += 1
                except:
                    prompt_fails += 1

                f3.write(f"--- Génération {i+1} ---\n")
                f3.write(result['generated_text'])
                f3.write("\n\n")

            line = f"| {num} | {prompt_successes} | {prompt_fails} | {100 * prompt_successes / (prompt_successes + prompt_fails)} |\n"
            f2.write(line)

            successes += prompt_successes
            fails += prompt_fails

        f2.write("\n")

    torch.cuda.empty_cache()
    f2.write(f"Number of tries: {successes + fails}\n")
    f2.write(f"Number of successes: {successes}\n")
    f2.write(f"Number of fails: {fails}\n")
    f2.write(f"Succes rate: {100 * successes / (successes + fails)}%\n")

Device set to use cuda:0
Generating completions:   0%|          | 0/21 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Generating completions: 100%|██████████| 21/21 [01:46<00:00,  5.08s/it]
