Names:
1. Jared Acord    (SID: 28446559)
2. Adam Keene     (SID: 26197478)
3. Brandon Lau    (SID: 18946278)
4. Jackson Bolcer (SID: 22144453)

## Assignment 2: Code Whiteboard Tutor

Main goal: Use a multimodal Large Language Model (LLM) to build a UI application that allows users to upload a photo of their handwritten Python code and receive suggestions for code improvements.

The main functionalities that must be included are:
- Transcribing handwritten Python code into a code snippet in
- Running static analysis and explaining bugs (if any) in natural language
- Suggesting bug fixes, improvements, or efficiency tweaks to the code snippet


In [None]:
# Install required packages
%pip -q install gradio unstructured sentence-transformers
%pip -q install google.generativeai     # for using local IDE

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/981.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m972.8/981.5 kB[0m [31m43.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m80.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m608.4/608.4 kB[0m [31m48.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m167.6/167.6 kB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m105.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.8/207.8 kB[0m [31m18.6 MB/s[0

In [None]:
# Import necessary libraries
import os
import time
from PIL import Image
import google.generativeai as genai
# from google import genai
from google.genai import types
from google.colab import userdata
import nest_asyncio
nest_asyncio.apply()


In [None]:
api_key = userdata.get('GOOGLE_API_KEY')
MODEL_ID = "gemini-2.5-flash-lite"
genai.configure(api_key=api_key)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Define path used in testing (not needed for app, just included for illustration)
GOOGLE_DRIVE_DIR_PREFIX = "drive/MyDrive/Classes/SWE270P/Assignments/SWE270P_A2/"
path_to_input_images = GOOGLE_DRIVE_DIR_PREFIX + "code_images_example/"
path_to_output_images = GOOGLE_DRIVE_DIR_PREFIX + "revised_code/"

In [None]:
import gradio as gr
from PIL import Image

OTHER_PROGRAMMING_LANG = "other"

# Define the gen config to be more conservative
code_analysis_config = genai.types.GenerationConfig(temperature=0.1)


def transcribe_image_code(image_file: Image.Image, code_language: str) -> str:
  """
  Step 1: Transcribes code from the input image using a multimodal LLM.

  Parameters:
      image_file (Image.Image): The uploaded image containing handwritten code.
      code_language (str): The programming language of the code.
  Returns:
      str: The transcribed code as a string.
  """
  if code_language.lower().strip() == OTHER_PROGRAMMING_LANG:
    prompt = "Transcribe the handwritten code in this image. It can be of any programming language. Only return the code and nothing else. Only return the code within triple backticks, and do not include any language identifier after the opening triple backticks."
  else:
    prompt = f"Transcribe the handwritten {code_language} code in this image. Only return the code and nothing else. Only return the code within triple backticks, and do not include any language identifier after the opening triple backticks."

  prompt += f"""
Here is an example of the output after transcribing the code in the image:
```
def hello_world():
    print("Hello, World!")
```

Notice how there is no language identifier after the opening triple backticks. This is an example of wrong output:
```python
def hello_world():
    print("Hello, World!")
```

Remember, no language identifier after the opening triple backticks after transcribing the code. I used Python in the above example, but it could be any programming language.
"""

  try:
    # Initialize the multimodal model
    model = genai.GenerativeModel(model_name=MODEL_ID)

    # Create content with the image and a prompt
    contents = [prompt, image_file]

    # Generate content from the model
    response = model.generate_content(contents)

    # Get the response text, and filter out any decorators
    ai_msg_content = response.text

    ai_msg_content = ai_msg_content.replace("```python", "")
    ai_msg_content = ai_msg_content.replace("```java", "")
    ai_msg_content = ai_msg_content.replace("```", "")

    # Return the transcribed text
    return ai_msg_content

  except Exception as e:
    raise gr.Error(f"Error during image transcription: {str(e)}")


def analyze_code(code_block: str, code_language: str) -> tuple[str, str]:
    """
    Step 2: Performs static analysis using LLM on the transcribed code.

    Parameters:
        code_block (str): The transcribed code snippet.
        code_language (str): The programming language of the code.
    Returns:
        (1) The analysis text explaining bugs, improvements, or efficiency suggestions.
        (2) The refined version of the code snippet with the suggested fixes and enhancements.
    """

    if not code_block:
        raise gr.Error("Please provide a valid code snippet.")

    if code_language.lower().strip() == OTHER_PROGRAMMING_LANG:
      prompt = f"""
You are an expert software developer and code reviewer. Analyze the following code.
First, identify the programming language of the code. Then, provide a code review with suggestions for bugs,
improvements, and efficiency tweaks based on the identified language.
"""
    else:
      prompt = f"You are an expert {code_language} developer and code reviewer. Analyze the following {code_language} code.\n"

    try:
        model = genai.GenerativeModel(MODEL_ID)

        prompt = prompt + f"""
The code probably contains one or more subtle or obvious bugs. Concisely give various
improvements and efficiency tweaks to the code, and explain the solutions to any bugs found.
When suggesting these changes, do not return the entire code, but rather reference only one
or a few lines as needed. Keep each suggestion brief and summerized.

After all suggested changes, return also a final, revised version of the code
which incorporates all changes you see fit within a single code block. Be careful not to rename the method names.

The format of your response must be as follows:

### Overview ###
(Give a brief overview of the code)
### Bugs ###
(A numbered list any bugs found, and their proposed resolution)
### Enhancements ###
(A numbered list any code improvements or efficiency tweaks)
### Final Code ###
(The final, revised version of the code.)

For example, given the following input code to analyze in Python:
```
def bucketSort(arr, k):
  counts = [0] * k
  for x in arr:
      counts[x] += 1

  sorted_arr = []
  for i, count in enumerate(arr):
      sorted_arr.extend([i] * count)

  return sorted_arr
```

Your response should be similar to the following output format (with more or fewer bugs and enhancements as needed):

### Overview ###
The provided `bucketSort` function has a significant logic error in how it reconstructs the sorted array and an inefficiency in its counting loop.
### Bugs ###
**Bug 1**: The second loop iterates over `arr` instead of `counts`. This will lead to an incorrect number of elements being added to `sorted_arr` and potentially an `IndexError` if elements in `arr` are greater than or equal to `k`.
    **Fix**: Change `for i, count in enumerate(arr):` to `for i, count in enumerate(counts):` to iterate over the `counts` list.
### Enhancements ###
1. (Inefficiency): The first loop iterates through `arr` to populate `counts`. If `k` is much larger than the actual range of numbers in `arr`, this is fine. However, if the numbers in `arr` are densely packed and much smaller than `k`, it's still efficient. The primary inefficiency lies in the second loop's structure for reconstruction.
2. (Clarity/Readability): The current approach is somewhat clear, but the bug makes it confusing. Once the bug is fixed, it's straightforward for its intended purpose.
3. (Minor Efficiency Tweak): For the reconstruction part, `sorted_arr.extend([i] * count)` is generally efficient. An alternative for very large `count` values could be a list comprehension or generator expression, but `extend` is usually well-optimized in CPython. The primary improvement is fixing the logic.
### Final Code ###
```
def bucketSort(arr, k):
    counts = [0] * k
    for x in arr:
        # Ensure x is within the bounds of k. If not, this will raise an IndexError.
        # A robust implementation might handle this by resizing k or raising a specific error.
        counts[x] += 1

    sorted_arr = []
    # Iterate over the counts list to reconstruct the sorted array.
    for i, count in enumerate(counts):
        sorted_arr.extend([i] * count)

    return sorted_arr
```

For the final revised code you write, return the code within triple backticks, and do not include any language identifier after the opening triple backticks.
For example, notice how there is no language identifier after the opening triple backticks.
This is an example of wrong output:
```python
def hello_world():
    print("Hello, World!")
```
This is an example of correct output:
```
def hello_world():
    print("Hello, World!")
```
Remember, no language identifier after the opening triple backticks after transcribing the code.

Now, analyze the following code:
```
{code_block}
```
        """
        # Generate the response using the gen config
        contents = [prompt]  # The LLM input is now a text prompt, not an image
        response = model.generate_content(contents, generation_config=code_analysis_config)

        # Get the response, and split
        response_text = response.text
        response_parts = response_text.split("### Final Code ###")

        # Grab the analysis and revised code
        analysis = response_parts[0].strip()
        revised_code = response_parts[1].strip()

        # Clean up the code part
        revised_code = revised_code.replace("```python", "")
        revised_code = revised_code.replace("```java", "")
        revised_code = revised_code.replace("```", "")

        return analysis, revised_code

    except Exception as e:
        raise gr.Error(f"Error during analysis: {str(e)}")



example_image = Image.open(path_to_input_images + "code_image_03.jpg")
code_language = "Python"
image_text = transcribe_image_code(example_image, code_language)
analysis, revised_code = analyze_code(image_text, code_language)

print("##### Code transcription from image ######\n")
print(image_text)

print("\n##### Code Analysis ######\n")
print(analysis)

print("\n##### Revised Code ######\n")
print(revised_code + "\n")

##### Code transcription from image ######


def bucketSort(arr, k):
    counts = [0] * k
    for x in arr:
        counts[x] += 1

    sorted_arr = []
    for i, count in enumerate(arr):
        sorted_arr.extend([i] * count)

    return sorted_arr


##### Code Analysis ######

### Overview ###
The provided `bucketSort` function implements a counting sort algorithm. It first counts the occurrences of each element within a given range `k`, and then reconstructs the sorted array based on these counts.

### Bugs ###
**Bug 1**: The second loop iterates over `arr` instead of `counts`. This will lead to an incorrect number of elements being added to `sorted_arr` and potentially an `IndexError` if elements in `arr` are greater than or equal to `k`.
    **Fix**: Change `for i, count in enumerate(arr):` to `for i, count in enumerate(counts):` to iterate over the `counts` list.

### Enhancements ###
1.  **Input Validation**: The code assumes all elements in `arr` are non-negative and less than 

In [None]:
import gradio as gr
from PIL import Image

def gradio_pipeline(image, language) -> tuple[str, str, str]:
  '''
  gradio_pipeline transcribes the image of code, performs static analysis, and returns the analysis and revised code.

  Parameters:
      image (Image.Image): The uploaded image containing handwritten code.
      language (str): The programming language of the code.

  Returns:
      (1) The transcribed code snippet.
      (2) The analysis text explaining bugs, improvements, or efficiency suggestions.
      (3) The refined version of the code with the suggested fixes and enhancements.
  '''
  transcribed = transcribe_image_code(image, language)
  analysis, revised = analyze_code(transcribed, language)
  return transcribed, analysis, revised

def shutdown_app():
  '''
  shutdown_app shuts down the gradio app.
  '''
  print("Shutting down gradio app...")
  try:
    app.close()
  except Exception as e:
    print(f"Error during shutdown: {str(e)}")

with gr.Blocks(title = "Whiteboard Tutor") as app:
  # Arrange UI elements. Create a single row with two columns
  with gr.Row():
    with gr.Column():
      image_input = gr.Image(type="pil", label="Upload Handwritten Code Image")
      lang = gr.Dropdown(["Python", "Java", "Other"], label="Code Language", value="Python")
      run_button = gr.Button("Analyze Code")
    with gr.Column():
      transcribe_output = gr.Code(label="Transcribed Code", interactive=False)
      analysis_output = gr.Markdown(label="Analysis")
      revised_output = gr.Code(label="Revised Code", interactive=False)
      close_button = gr.Button("Close App")

    run_button.click(gradio_pipeline, inputs=[image_input, lang], outputs=[transcribe_output, analysis_output, revised_output])
    close_button.click(shutdown_app)

if __name__ == "__main__":
  app.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://b8ce601e51f91c8b84.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/fastapi/applications.py", line 1134, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.12/dist-packages/starlette/applications.py", line 113, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py", line 186, in __call__
    raise exc
  File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py",

In [None]:
app.close()
!pkill -f "python.*gradio"
!pkill -f "*gradio*"

### Additional Resource: Running test cases on LLM-generated code.

In [None]:
"""
Example of running test cases on LLM-generated code.
You do not need to follow this exact implementation for your code."""

import json
import importlib.util

def run_tests(filename_original, function_name, json_test_path):
    # Load test cases
    with open(json_test_path, 'r') as f:
        test_cases = json.load(f)["test_case"]

    # Load the function from the file
    spec = importlib.util.spec_from_file_location(function_name, filename_original)
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)

    #func = getattr(module, function_name)
    func = getattr(module, [attr for attr in dir(module) if not attr.startswith('__')][0])

    # Run tests
    for idx, case in enumerate(test_cases):
        try:
            inputs = case["input"]
            expected = case["expected"]
            result = func(*inputs) if isinstance(inputs, (list, tuple)) else func(inputs)
            assert result == expected, f"input={inputs}, expected={expected}, got={result}"
            print(f"Test {idx+1} passed.")
        except Exception as e:
            print(f"Test {idx+1} failed: {e}")


In [None]:
"""
Make sure to save the final code (after transcribing and performing static analysis) into a Python file.
For example, if you have saved the final code transcribed and fixed by the LLM as example_llm_code.py,
you can run the test cases using the format below.

You can also add more inputs and expected outputs to the JSON file to run additional tests.
It is encouraged to add more test cases to ensure the robustness of your code.
"""
from pathlib import Path
import time


image_input_path = path_to_input_images + "code_image_03.jpg"
image_output_path = path_to_output_images + "revised_code_03.py"
path_to_json = GOOGLE_DRIVE_DIR_PREFIX + "test_case_bucketsort.json"

input_image = Image.open(image_input_path)

image_text = transcribe_image_code(input_image, "Python")
analysis, revised_code = analyze_code(image_text, "Python")

with Path(image_output_path).open("w", encoding="utf-8") as f:
    f.write(revised_code)

time.sleep(5)

run_tests(
    filename_original=image_output_path,
    function_name="bucketsort",
    json_test_path=path_to_json
)


Test 1 passed.
Test 2 passed.
Test 3 passed.
Test 4 passed.
Test 5 passed.
Test 6 passed.
