In [None]:
from datasets import load_dataset
import json
from dataclasses import dataclass
from enum import Enum
import zlib
import pickle
from src.utils.utils import evaluate_generations, codegen_metrics

dataset = load_dataset("huypn16/LCB-R-F")["test"]

In [None]:
from src.tasks.debug.lcb_debug import CodeGenerationProblem
generations_all = []
samples = []
responses_all = []

for idx in range(len(dataset)):
# for idx in range(1):
    problem = CodeGenerationProblem(dataset[idx])
    generations = []
    for failed_generation in dataset[idx]["failed_generations"]:
        buggy_program = failed_generation["failed_solution"]
        # error_message = ""
        
        # generations = run(problem, buggy_program, error_message)
        generations.append(buggy_program)
    generations_all.append(generations)
    samples.append(problem.get_evaluation_sample())
    
results, metadata = evaluate_generations(samples, generations_all, num_process_evaluate=32, timeout=6)
metric = codegen_metrics(samples, generations_all)

In [None]:
for i in range(len(metric[2])):
    if len(metric[2][i]) != len(generations_all[i]):
        print(len(metric[2][i]))
        print(len(generations_all[i]))

In [None]:
valid_ids = {}
error_messages = []
for id_instance, instance in enumerate(metric[2]):
    messages = []
    if id_instance not in valid_ids:
        valid_ids[id_instance] = []
    for id_sample, sample in enumerate(instance):
        if "TimeoutException" not in sample and '{"output": ""' not in sample:
            sample = sample.replace(', "error_code": -2, "error_message":', ",")
            valid_ids[id_instance].append(id_sample)
            messages.append(sample)
        
    error_messages.append(messages)
    

In [None]:
valid_ids = {k: v for k, v in valid_ids.items() if len(v) > 0}
error_messages = [error_messages[k] for k in valid_ids.keys()]

In [None]:
# Initialize an empty list to store the result
result = []
total_samples = 0
# Iterate over each key `i` in `valid_ids`
for i in valid_ids.keys():
    # Check if `valid_ids[i]` has elements
    if len(valid_ids[i]) > 0:
        inner_list = []
        for valid_id in valid_ids[i]:
            inner_list.append(generations_all[i][valid_id])
                
        result.append(inner_list)
        total_samples += len(inner_list)
print(total_samples)

In [None]:
len(result[0])

In [None]:
dataset = dataset.add_column("failed_solution", result)

for idx in range(len(dataset)):
    dataset[idx]["error_messages"] = error_messages[idx]

In [None]:
for instance in dataset:
    if len(instance["failed_solution"]) != len(instance["error_messages"]):
        print(len(instance["failed_generations"]))
        print(len(instance["error_messages"]))

In [None]:
error_messages[0]

In [None]:
dataset.push_to_hub("huypn16/LCB-R-F")

In [None]:
import anthropic
import re
import os

pattern = r"```python(.*?)```"  
SYSTEM_MESSAGE_GENERIC = f"You are an expert Python programmer. You will be given a question (problem specification), a buggy program and its error message, you will generate a correct Python program that matches the specification, fix the original program and passes all the tests. You should think step-by-step logically before returning final the program. The program should only include function definition with parameter list in order."

def get_template_answer(question: CodeGenerationProblem, buggy_prorgam: str, error_message: str):
    prompt = f"### Question:\n{question.question_content}\n\n"
    prompt += f"### Buggy program:\n```python\n{buggy_prorgam}\n```\n\n"
    prompt += f"### Error message:\n{error_message}\n\n"
    prompt += "Your reasoning: ....```python\n# YOUR CODE HERE\n```\n\n"
    return prompt
    
def run(problem: CodeGenerationProblem, buggy_program: str, error_message: str):
    client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
    chat_messages = [
        {
            "role": "user",
            "content": get_template_answer(problem, buggy_program, error_message),
        },
    ]
    try:
        response = client.messages.create(
            model="claude-3-5-sonnet-20240620",
            system=SYSTEM_MESSAGE_GENERIC,
            messages=chat_messages,
            max_tokens=2048
        )
        solution = response.content[0].text
    except:
        solution = ""
        
    match = re.search(pattern, solution, re.DOTALL)
    if match:
        python_code = match.group(1).strip()
    else:
        python_code = ""
    return python_code

In [None]:
from tqdm import tqdm
generations_all = []
samples = []
responses_all = []

for idx in tqdm(range(len(dataset))):
# for idx in range(1):
    problem = CodeGenerationProblem(dataset[idx])
    generations = []
    for j, buggy_program in enumerate(dataset[idx]["failed_solution"]):
        error_message = dataset[idx]["error_messages"][j]     
        generations.append(run(problem, buggy_program, error_message))
    generations_all.append(generations)
    samples.append(problem.get_evaluation_sample())
    
results, metadata = evaluate_generations(samples, generations_all, num_process_evaluate=32, timeout=6)
metric = codegen_metrics(samples, generations_all)

In [None]:
from src.codetransform.next import execute_and_trace

In [None]:
import anthropic
import re
import os

pattern = r"```python(.*?)```"  
SYSTEM_MESSAGE_GENERIC = f"You are an expert Python programmer. You will be given a question (problem specification), a buggy program with annotation of intermediate variables and its error message, you will generate a correct Python program that matches the specification, fix the original program based on control flow and error message to pass all the tests. You should think step-by-step logically before returning final the program. The program should only include function definition with parameter list in order."

def get_template_answer(question: CodeGenerationProblem, buggy_prorgam: str, error_message: str):
    
    annotated_program = execute_and_trace(buggy_prorgam)
    
    prompt = f"### Question:\n{question.question_content}\n\n"
    prompt += f"### Annotated Buggy program:\n```python\n{annotated_program}\n```\n\n"
    prompt += f"### Error message:\n{error_message}\n\n"
    prompt += "Your reasoning: ....```python\n# YOUR CODE HERE\n```\n\n"
    return prompt
    
def run(problem: CodeGenerationProblem, buggy_program: str, error_message: str):
    client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
    chat_messages = [
        {
            "role": "user",
            "content": get_template_answer(problem, buggy_program, error_message),
        },
    ]
    try:
        response = client.messages.create(
            model="claude-3-5-sonnet-20240620",
            system=SYSTEM_MESSAGE_GENERIC,
            messages=chat_messages,
            max_tokens=2048
        )
        solution = response.content[0].text
    except:
        solution = ""
        
    match = re.search(pattern, solution, re.DOTALL)
    if match:
        python_code = match.group(1).strip()
    else:
        python_code = ""
    return python_code

In [50]:
print(execute_and_trace(buggy_program))

class Solution:
    def maxSelectedElements(self, nums: List[int]) -> int: # (0) __module__=builtins; __qualname__=Solution; ...; (1) __module__=builtins; __qualname__=Solution
        # Sort the array
        nums.sort()

        # Dictionary to store the length of consecutive sequences
        dp = {}
        max_length = 0

        for num in nums:
            # Option 1: Keep the number as is
            dp[num] = dp.get(num - 1, 0) + 1

            # Option 2: Increase the number by 1
            dp[num + 1] = dp.get(num, 0) + 1

            # Update the maximum length
            max_length = max(max_length, dp[num], dp[num + 1])

        return max_length
