## Processar testes em arquivos

### Configurações necessárias

In [4]:
%pip install pytest

Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import subprocess
import re
from pathlib import Path

In [None]:
if not os.path.exists("QuixBugs"):
    subprocess.run(["git", "clone", "https://github.com/jkoppel/QuixBugs.git"])

In [2]:
python_input_dir = Path("QuixBugs/python_programs")
python_test_dir = Path("QuixBugs/python_testcases")
java_input_dir = Path("QuixBugs/java_programs")
java_test_dir = Path("QuixBugs/java_testcases")

Path("llm_outputs").mkdir(exist_ok=True)
python_output_llama_dir = Path("llm_outputs/python_programs_corrected_by_llama")
python_output_mistral_dir = Path("llm_outputs/python_programs_corrected_by_mistral")
java_output_llama_dir = Path("llm_outputs/java_programs_corrected_by_llama")
java_output_mistral_dir = Path("llm_outputs/java_programs_corrected_by_mistral")

python_output_llama_dir.mkdir(exist_ok=True)
python_output_mistral_dir.mkdir(exist_ok=True)
java_output_llama_dir.mkdir(exist_ok=True)
java_output_mistral_dir.mkdir(exist_ok=True)

### Funções de processamento

In [9]:
import subprocess
import re
import ast

def get_test_functions(file_path):
    """Extrai nomes de funções de teste de um arquivo Python"""
    tree = ast.parse(file_path.read_text(encoding="utf-8"))
    return [node.name for node in tree.body if isinstance(node, ast.FunctionDef) and node.name.startswith("test_")]

def run_tests_on_generated_code(generated_code_dir: Path, test_dir: Path, language: str):
    test_files = list(test_dir.glob("test_*.py"))
    print(f"Executando {len(test_files)} arquivos de teste...\n")

    for test_file in test_files:
        print(f"==> Rodando testes em: {test_file.name}")
        original_code = test_file.read_text(encoding="utf-8")

        modified_code = re.sub(
            r"from correct_python_programs\.(\w+) import (\w+)",
            rf"from {generated_code_dir.name}.\1 import \2",
            original_code
        )
        test_file.write_text(modified_code, encoding="utf-8")

        test_names = get_test_functions(test_file)
        failed_count = 0
        passed_count = 0

        for test_name in test_names:
            try:
                result = subprocess.run(
                    ["pytest", str(test_file), "-k", test_name, "--tb=no", "-q"],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    text=True,
                    timeout=5
                )
                if "1 failed" in result.stdout:
                    failed_count += 1
                elif "1 passed" in result.stdout:
                    passed_count += 1
                else:
                    failed_count += 1  # fallback
                    print(f"⚠️ Resultado inesperado em {test_name}: {result.stdout.strip()}")

            except subprocess.TimeoutExpired:
                failed_count += 1
                print(f"⏱️ Timeout no teste: {test_name}")

        total = failed_count + passed_count
        print(f"{test_file.name}: {failed_count}/{total} falha(s)\n")
        test_file.write_text(original_code, encoding="utf-8")


### Processamentos

In [10]:
run_tests_on_generated_code(python_output_llama_dir, python_test_dir, "python")

Executando 40 arquivos de teste...

==> Rodando testes em: test_find_in_sorted.py
⚠️ Resultado inesperado em test_find_in_sorted: [32m.[0m[31mF[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[31mF[0m[31m                                                                  [100%][0m
[31mFAILED[0m QuixBugs/python_testcases/test_find_in_sorted.py::[1mtest_find_in_sorted[input_data1--1][0m - RecursionError: maximum recursion depth exceeded in comparison
[31mFAILED[0m QuixBugs/python_testcases/test_find_in_sorted.py::[1mtest_find_in_sorted[input_data6--1][0m - RecursionError: maximum recursion depth exceeded in comparison
[31m[31m[1m2 failed[0m, [32m5 passed[0m[31m in 0.09s[0m[0m
test_find_in_sorted.py: 1/1 falha(s)

==> Rodando testes em: test_sqrt.py
⏱️ Timeout no teste: test_sqrt
test_sqrt.py: 1/1 falha(s)

==> Rodando testes em: test_shunting_yard.py
⚠️ Resultado inesperado em test_shunting_yard: [32m.[0m[32m.[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31m          

In [None]:
run_tests_on_generated_code(java_output_llama_dir, java_test_dir, "java")

In [11]:
run_tests_on_generated_code(python_output_mistral_dir, python_test_dir, "python")

Executando 40 arquivos de teste...

==> Rodando testes em: test_find_in_sorted.py
⚠️ Resultado inesperado em test_find_in_sorted: [32m.[0m[31mF[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[31mF[0m[31m                                                                  [100%][0m
[31mFAILED[0m QuixBugs/python_testcases/test_find_in_sorted.py::[1mtest_find_in_sorted[input_data1--1][0m - RecursionError: maximum recursion depth exceeded in comparison
[31mFAILED[0m QuixBugs/python_testcases/test_find_in_sorted.py::[1mtest_find_in_sorted[input_data6--1][0m - RecursionError: maximum recursion depth exceeded in comparison
[31m[31m[1m2 failed[0m, [32m5 passed[0m[31m in 0.07s[0m[0m
test_find_in_sorted.py: 1/1 falha(s)

==> Rodando testes em: test_sqrt.py
⏱️ Timeout no teste: test_sqrt
test_sqrt.py: 1/1 falha(s)

==> Rodando testes em: test_shunting_yard.py
⚠️ Resultado inesperado em test_shunting_yard: [32m.[0m[32m.[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31m          

In [None]:
run_tests_on_generated_code(java_output_mistral_dir, java_test_dir, "java")