### 1. Setup Environment

In [166]:
!git clone https://github.com/jkoppel/QuixBugs
!apt-get install gradle
!pip install dotenv
!pip install langchain_openai
!pip install tabulate

Cloning into 'QuixBugs'...
remote: Enumerating objects: 1123, done.[K
remote: Counting objects: 100% (325/325), done.[K
remote: Compressing objects: 100% (86/86), done.[K
remote: Total 1123 (delta 250), reused 239 (delta 239), pack-reused 798 (from 1)[K
Receiving objects: 100% (1123/1123), 1.14 MiB | 8.20 MiB/s, done.
Resolving deltas: 100% (645/645), done.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
gradle is already the newest version (4.4.1-13).
0 upgraded, 0 newly installed, 0 to remove and 29 not upgraded.


In [167]:
import os
from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
import subprocess
import shutil
from tabulate import tabulate
import re

In [168]:
load_dotenv()

OPENAI_API_KEY=''

llm = ChatOpenAI(model_name="gpt-4", openai_api_key=OPENAI_API_KEY)  # Adjust based on API

### 2. Run Failed Tests

In [175]:
!gradle test --tests java_testcases.junit.KNAPSACK_TEST

[m
[1A[1m<[0;32;1;0;39;1m-------------> 0% CONFIGURING [0s][m[35D[1B
[2A[1m<[0;32;1;0;39;1m-------------> 0% EXECUTING [0s][m[0K[33D[1B[1m> :compileJava[m[14D[1B[2A[1m<[0;32;1;0;39;1m-------------> 0% EXECUTING [0s][m[33D[1B[1m> :compileJava[m[14D[1B[2A[1m<[0;32;1;0;39;1m-------------> 0% EXECUTING [0s][m[33D[1B[1m> :compileJava[m[14D[1B[2A[1m<[0;32;1;0;39;1m-------------> 0% EXECUTING [0s][m[33D[1B[1m> :compileJava[m[14D[1B[2A[1m<[0;32;1;0;39;1m-------------> 0% EXECUTING [0s][m[33D[1B[1m> :compileJava[m[14D[1B[2A[1m<[0;32;1;0;39;1m-------------> 0% EXECUTING [0s][m[33D[1B[1m> :compileJava[m[14D[1B[2A[1m<[0;32;1;0;39;1m-------------> 0% EXECUTING [0s][m[33D[1B[1m> :compileJava[m[14D[1B[2A[1m<[0;32;1;0;39;1m-------------> 0% EXECUTING [0s][m[33D[1B[1m> :compileJava[m[14D[1B[2A[1m<[0;32;1;0;39;1m-------------> 0% EXECUTING [0s][m[33D[1B[1m> :compileJava[m[14D[1B[2A[1m<[0;32;1;0;39;1m----

### 3. Define Prompt Templates (Zero-shot, few-shot, chain-of-thought)



In [176]:
ZERO_SHOT_PROMPT = PromptTemplate(
    template="""You are an expert Java developer.
Below is a piece of code that has a bug (syntax or logical). Please fix it.

CODE:
{text}

Provide ONLY the corrected code, starting from the package declaration, WITHOUT using any markdown formatting or triple backticks.""",
    input_variables=["text", "package"]
)

FEW_SHOT_PROMPT = PromptTemplate(
    template="""You are an expert Java developer.
Below is an example of a bug and its fix:

EXAMPLE BUG:
def add_numbers(a, b):
    return a - b

EXAMPLE FIX:
def add_numbers(a, b):
    return a + b

Now, here is another buggy code snippet. Fix it using the same logic.

CODE:
{text}

Provide ONLY the corrected code, starting from the package declaration, WITHOUT using any markdown formatting or triple backticks.""",
    input_variables=["text", "package"]
)

CHAIN_OF_THOUGHT_PROMPT = PromptTemplate(
    template="""You are an expert Java developer.
I will give you code with a bug. Think step by step about the bug,
explain your reasoning, then provide a corrected version.

CODE:
{text}

First, explain your reasoning (step-by-step), then clearly indicate the corrected code by using:

---FIXED CODE---
(Your fixed code starts here)
---END FIXED CODE---

Provide ONLY the corrected Java code between these markers, starting from the package declaration, WITHOUT using any markdown formatting or triple backticks.

If the code is already correctly

""",
    input_variables=["text"]
)

### 4. Bug File Processing

In [180]:
# QuixBugs

# 1. Get java main method and corresponding java test class

# 2. Run the code using all 3 prompts (defined before) on a single java method/test class

# 3. Test results by re-running gradle on the new test cases (by writing the corrected test class to a new java package) and verifying whether gradle was successful or failed

def get_java_files(problem_name):
  java_main_path = os.path.join(QUIXBUGS_PATH, "java_programs", f"{problem_name}.java")
  java_test_path = os.path.join(QUIXBUGS_PATH, "java_testcases", "junit", f"{problem_name}_TEST.java")

  if not os.path.exists(java_main_path) or not os.path.exists(java_test_path):
    raise FileNotFoundError(f"Files for {problem_name} not found.")

  return java_main_path, java_test_path

def extract_fixed_code(response_text):
  """Extracts Java code from Chain-of-Thought response using markers."""
  match = re.search(r"---FIXED CODE---\s*(.*?)\s*---END FIXED CODE---", response_text, re.DOTALL)
  if match:
      txt = match.group(1).strip()
      return txt
  print("[ERROR] extracting fixed code, returning non-extracted code fallback")
  return response_text.strip()  # Fallback if markers are missing

def apply_prompt(prompt, code):
  """Applies the LLM-based bug-fixing prompt to the Java code"""
  response = llm.invoke(prompt.format(text=code))

  # Extract palin text from response
  response_text = response.content.strip() if hasattr(response, "content") else str(response).strip()

  # If Chain-of-Thought, extract only the Java code
  if "FIXED CODE" in str(prompt):
      return extract_fixed_code(response_text)

  return response_text  # Directly return for other prompts

def backup_java_file(java_file):
    """Creates a backup of the original Java file."""
    backup_file = f"{java_file}.bak"
    if not os.path.exists(backup_file):
        shutil.copy(java_file, backup_file)
        print(f"Backup created: {backup_file}")

def restore_java_file(java_file):
    """Restores the original Java file from the backup."""
    backup_file = f"{java_file}.bak"
    if os.path.exists(backup_file):
        shutil.copy(backup_file, java_file)
        print(f"Restored original Java file from {backup_file}")
    else:
      print(f"[ERROR] No backup file exists: {backup_file}")

def update_java_file(java_file, fixed_code):
    """Overwrites the original Java file with the corrected version."""
    with open(java_file, "w") as f:
        f.write(fixed_code)
    print(f"Updated {java_file} with fixed code.")

def run_gradle_test(problem_name):
  """Runs Gradle tests and returns whether the fix was successful."""
  test_class = f"java_testcases.junit.{problem_name}_TEST"
  cmd = f"gradle test --tests {test_class}"

  try:
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True, cwd=QUIXBUGS_PATH)
    print(result.stdout)
    return "BUILD SUCCESSFUL" in result.stdout
  except Exception as e:
    print(f"Error running Gradle: {e}")
    return False

def automate_bug_fixing():
  results = []

  for problem_name in PROBLEM_NAMES:
    java_file = os.path.join(QUIXBUGS_PATH, "java_programs", f"{problem_name}.java")
    test_class = f"java_testcases.junit.{problem_name}_TEST"
    """Runs all 3 prompts to fix bugs and tests them"""
    # Backup original code
    backup_java_file(java_file)

    with open(java_file, "r") as f:
      buggy_code = f.read()

    # Try each prompt approach until one succeeds
    for prompt_name, prompt in [
        ("Zero-Shot", ZERO_SHOT_PROMPT),
        ("Few-Shot", FEW_SHOT_PROMPT),
        ("Chain-of-Thought", CHAIN_OF_THOUGHT_PROMPT)
    ]:
        print(f"\nTrying {prompt_name} Fix for {problem_name}...")

        fixed_code = apply_prompt(prompt, buggy_code)

        print("Fixed code:")
        print(fixed_code)

        update_java_file(java_file, fixed_code)  # Overwrite the original Java file

        print("\nRunning Gradle Test...")

        success = run_gradle_test(problem_name)

        results.append([problem_name, prompt_name, "✅ Passed" if success else "❌ Failed"])

        if success:
            print(f"\n✅ {prompt_name} Fix Worked! Bug Fixed in {problem_name}.java")

        # Restoring java code for the next prompt:
        restore_java_file(java_file)


  # Print results in a table
  print("\nTest Results:")
  print(tabulate(results, headers=["Problem", "Approach", "Test Result"], tablefmt="grid"))


### 5. LLM Invocation and Response Processing

In [182]:
# Run automation on a test problem
QUIXBUGS_PATH = "/content/QuixBugs"
# PROBLEM_NAMES = ["KNAPSACK", "KHEAPSORT", "KTH", "LCS_LENGTH"]
PROBLEM_NAMES = [
  "BITCOUNT",
  "BREADTH_FIRST_SEARCH",
  "BUCKETSORT",
  "DEPTH_FIRST_SEARCH",
  "DETECT_CYCLE",
  "FIND_IN_SORTED",
  "FLATTEN",
  "GCD",
  "GET_FACTORS",
  "HANOI",
]

if __name__ == "__main__":
  if os.getcwd() != QUIXBUGS_PATH:
    print(f"Changing working dir to {QUIXBUGS_PATH}")
    os.chdir(QUIXBUGS_PATH)
  automate_bug_fixing()

Backup created: /content/QuixBugs/java_programs/BITCOUNT.java.bak

Trying Zero-Shot Fix for BITCOUNT...
Fixed code:
package java_programs;

/**
 *
 * @author derricklin
 */
public class BITCOUNT {
    public static int bitcount(int n) {
    int count = 0;
    while (n != 0) {
        n = n & (n - 1);
        count++;
    }
    return count;
    }
}
Updated /content/QuixBugs/java_programs/BITCOUNT.java with fixed code.

Running Gradle Test...
:compileJava
:processResources NO-SOURCE
:classes
:compileTestJava UP-TO-DATE
:processTestResources NO-SOURCE
:testClasses UP-TO-DATE
:test

BUILD SUCCESSFUL in 3s
3 actionable tasks: 2 executed, 1 up-to-date


✅ Zero-Shot Fix Worked! Bug Fixed in BITCOUNT.java
Restored original Java file from /content/QuixBugs/java_programs/BITCOUNT.java.bak

Trying Few-Shot Fix for BITCOUNT...
Fixed code:
package java_programs;

/**
 *
 * @author derricklin
 */
public class BITCOUNT {
    public static int bitcount(int n) {
    int count = 0;
    while (n != 0) {