## Processar testes em arquivos

### Configurações necessárias

In [4]:
%pip install pytest

Note: you may need to restart the kernel to use updated packages.


In [2]:
import json
import os
import ast
import shutil
import subprocess
import re
from pathlib import Path

In [None]:
if not os.path.exists("QuixBugs"):
    subprocess.run(["git", "clone", "https://github.com/jkoppel/QuixBugs.git"])

In [3]:
python_input_dir = Path("QuixBugs/python_programs")
python_test_dir = Path("QuixBugs/python_testcases")
java_input_dir = Path("QuixBugs/java_programs")
java_test_dir = Path("QuixBugs/java_testcases")

Path("llm_outputs").mkdir(exist_ok=True)
python_output_llama_dir = Path("llm_outputs/python_programs_corrected_by_llama")
python_output_deepseek_dir = Path("llm_outputs/python_programs_corrected_by_deepseek")
java_output_llama_dir = Path("llm_outputs/java_programs_corrected_by_llama")
java_output_deepseek_dir = Path("llm_outputs/java_programs_corrected_by_deepseek")

python_output_llama_dir.mkdir(exist_ok=True)
python_output_deepseek_dir.mkdir(exist_ok=True)
java_output_llama_dir.mkdir(exist_ok=True)
java_output_deepseek_dir.mkdir(exist_ok=True)

### Funções de processamento

In [5]:
def run_tests_on_generated_code(generated_code_dir: Path, test_dir: Path, language: str):
  test_files = list(test_dir.glob("test_*.py" if language == "python" else "*.java"))
  print(f"Executando {len(test_files)} arquivos de teste...\n")

  for test_file in test_files:
    print(f"==> Rodando testes em: {test_file.name}")

    original_code = test_file.read_text(encoding="utf-8")

    if language == "python":
      modified_code = re.sub(
          r"from correct_python_programs\.(\w+) import (\w+)",
          rf"from {generated_code_dir.name}.\1 import \2",
          original_code
      )
      test_file.write_text(modified_code, encoding="utf-8")

      result = subprocess.run(
          ["pytest", str(test_file), "--tb=no", "--maxfail=1000", "-q"],
          stdout=subprocess.PIPE,
          stderr=subprocess.PIPE,
          text=True
      )

      lines = result.stdout.splitlines()
      summary_line = next((line for line in lines if re.search(r"(failed|passed)", line)), "")
      match_fail = re.search(r"(\d+)\s+failed", summary_line)
      match_pass = re.search(r"(\d+)\s+passed", summary_line)

      failed_count = int(match_fail.group(1)) if match_fail else 0
      passed_count = int(match_pass.group(1)) if match_pass else 0
      total = failed_count + passed_count

      print(f"{test_file.name}: {failed_count}/{total} falha(s)\n")

      test_file.write_text(original_code, encoding="utf-8")

    elif language == "java":
      modified_code = re.sub(
        r"import java_programs\.(\w+);",
        rf"import {generated_code_dir.name}.\1;",
        original_code
      )
      test_file.write_text(modified_code, encoding="utf-8")

      compile_result = subprocess.run(
        ["javac", "-cp", f"{generated_code_dir}:{test_dir}", str(test_file)],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True
      )
      if compile_result.returncode != 0:
          print(f"Erro na compilação de {test_file.name}:\n{compile_result.stderr}")
          continue

      class_name = test_file.stem
      run_result = subprocess.run(
        ["java", "-cp", f"{generated_code_dir}:{test_dir}", f"java_testcases.{class_name}"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True
      )

      output = run_result.stdout
      failed_count = output.count("Path Not Found")
      passed_count = output.count("Path Found")
      total = failed_count + passed_count

      print(output.strip())
      print(f"{test_file.name}: {failed_count}/{total} falha(s)\n")

      test_file.write_text(original_code, encoding="utf-8")

### Processamentos

In [None]:
run_tests_on_generated_code(python_output_llama_dir, python_test_dir, "python")

Executando 40 arquivos de teste...

==> Rodando testes em: test_find_in_sorted.py
test_find_in_sorted.py: 2/7 falha(s)

==> Rodando testes em: test_sqrt.py


In [None]:
run_tests_on_generated_code(java_output_llama_dir, java_test_dir, "java")

In [None]:
run_tests_on_generated_code(python_output_deepseek_dir, python_test_dir, "python")

In [None]:
run_tests_on_generated_code(java_output_deepseek_dir, java_test_dir, "java")