Unit test generator using GPT, Claude and Gemini.
This will create unit test code from python and also run the code and provide the result (including any errors)
Note:
When I tried to use claude-sonnet-4-20250514 the results were too big and the python was cut-off (no matter how big I made the max tokens).  This seemed to be the case for both examples.  I've changed it to claude-3-5-sonnet-20240620 and it seems to be run better.

In [2]:
!pip install openai python-dotenv google-generativeai anthropic gradio



In [3]:
# imports

import os
import requests
from openai import OpenAI
import google.generativeai
import anthropic
from IPython.display import Markdown, display, update_display
import gradio as gr
import sys
import io
import traceback
import unittest
import subprocess
import tempfile
from google.colab import userdata

In [4]:
# keys

openai_api_key = userdata.get("OPENAI_API_KEY")

if openai_api_key:
    print("All good")
else:
    print("OpenAI key issue")

claude_api_key = userdata.get("ANTHROPIC_API_KEY")

if claude_api_key:
    print("All good")
else:
    print("Claude key issue")

google_api_key = userdata.get("GOOGLE_API_KEY")

if google_api_key:
    print("All good")
else:
    print("Google key issue")

hugging_face_token = userdata.get("HF_TOKEN")

if hugging_face_token:
    print("All good")
else:
    print("Huffing Face token issue")

All good
All good
All good
All good


In [5]:
# initialise

openai = OpenAI(api_key=openai_api_key)
claude = anthropic.Anthropic()
google.generativeai.configure()

OPENAI_MODEL = "gpt-4o"
CLAUDE_MODEL = "claude-3-5-sonnet-20240620" #"claude-sonnet-4-20250514"
GOOGLE_MODEL = "gemini-2.0-flash"

max_tok = 5000

In [6]:
system_message = "You are an engineer with responsibility for unit testing python code."
system_message += "You review base python code and develop unit tests, also in python, which validate each unit of code."
system_message += """ The output must be in Python with both the unit tests and comments explaining the purpose of each test.
The output should not include any additional text at the start or end including "```".  It should be possible to run the code without any updates including an execution statement.
Include the base / original python code in the response."""

In [7]:
def user_prompt_for(python):
    user_prompt = "Review the Python code provided and develop unit tests which can be run in a jupyter lab."
    user_prompt += """ The output must be in Python with both the unit tests and comments explaining the purpose of each test.
The output should not include any additional text at the start or end including "```".  It should be possible to run the code without any updates (include an execution statement).
Include the base / original python code in the response."""
    user_prompt += python
    return user_prompt

In [8]:
def messages_for(python):
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt_for(python)}
    ]

In [9]:
# python example
example = """class BookNotAvailableError(Exception):
    pass

class Library:
    def __init__(self):
        self.inventory = {}  # book title -> quantity
        self.borrowed = {}   # user -> list of borrowed book titles

    def add_book(self, title, quantity=1):
        if quantity <= 0:
            raise ValueError("Quantity must be positive")
        self.inventory[title] = self.inventory.get(title, 0) + quantity

    def borrow_book(self, user, title):
        if self.inventory.get(title, 0) < 1:
            raise BookNotAvailableError(f"'{title}' is not available")
        self.inventory[title] -= 1
        self.borrowed.setdefault(user, []).append(title)

    def return_book(self, user, title):
        if user not in self.borrowed or title not in self.borrowed[user]:
            raise ValueError(f"User '{user}' did not borrow '{title}'")
        self.borrowed[user].remove(title)
        self.inventory[title] = self.inventory.get(title, 0) + 1

    def get_available_books(self):
        return {title: qty for title, qty in self.inventory.items() if qty > 0}

    def get_borrowed_books(self, user):
        return self.borrowed.get(user, [])"""

In [10]:
# python example2
example2 = """class Calculator:
    def add(self, a, b):
        return a + b

    def subtract(self, a, b):
        return a - b

    def divide(self, a, b):
        if b == 0:
            raise ValueError("Cannot divide by zero")
        return a / b

    def multiply(self, a, b):
        return a * b


def is_prime(n):
    if n <= 1:
        return False
    if n <= 3:
        return True
    if n % 2 == 0 or n % 3 == 0:
        return False
    i = 5
    while i * i <= n:
        if n % i == 0 or n % (i + 2) == 0:
            return False
        i += 6
    return True
    """

In [11]:
def unit_test_gpt(python):
    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)
    reply = ""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        reply += fragment
        yield reply

In [12]:
def unit_test_claude(python):
    result = claude.messages.stream(
        model=CLAUDE_MODEL,
        max_tokens=max_tok,
        system=system_message,
        messages=[{"role": "user", "content": user_prompt_for(python)}],
    )
    reply = ""
    with result as stream:
        for text in stream.text_stream:
            reply += text
            yield reply

In [13]:
def unit_test_google(python):
    model = google.generativeai.GenerativeModel(
        model_name=GOOGLE_MODEL,
        system_instruction=system_message
    )
    stream = model.generate_content(contents=user_prompt_for(python),stream=True)
    reply = ""
    for chunk in stream:
        reply += chunk.text or ""
        yield reply.replace("```python\n", "").replace("```", "")

In [None]:
#unit_test_gpt(example)
#unit_test_claude(example)
#unit_test_google(example)

In [14]:
def select_model(python, model):
    if model=="GPT":
        result = unit_test_gpt(python)
    elif model=="Claude":
        result = unit_test_claude(python)
    elif model=="Google":
        result = unit_test_google(python)
    else:
        raise ValueError("Unknown model")
    for stream_so_far in result:
        yield stream_so_far

In [None]:
# with gr.Blocks() as ui:
#     with gr.Row():
#         python = gr.Textbox(label="Python code:", lines=10, value=example)
#         test = gr.Textbox(label="Unit tests", lines=10)
#     with gr.Row():
#         model = gr.Dropdown(["GPT", "Claude","Google"], label="Select model", value="GPT")
#         generate = gr.Button("Generate unit tests")

#     generate.click(select_model, inputs=[python, model], outputs=[test])

# ui.launch()

In [15]:
def execute_python(code):
    # Capture stdout and stderr
    output = io.StringIO()
    sys_stdout = sys.stdout
    sys_stderr = sys.stderr
    sys.stdout = output
    sys.stderr = output

    try:
        # Compile the code first
        compiled_code = compile(code, '<string>', 'exec')

        # Prepare a namespace dict for exec environment
        # Include __builtins__ so imports like 'import unittest' work
        namespace = {"__builtins__": __builtins__}

        # Run the user's code, but expect tests will be defined here
        exec(compiled_code, namespace)

        # Look for unittest.TestCase subclasses in the namespace
        loader = unittest.TestLoader()
        suite = unittest.TestSuite()

        for obj in namespace.values():
            if isinstance(obj, type) and issubclass(obj, unittest.TestCase):
                tests = loader.loadTestsFromTestCase(obj)
                suite.addTests(tests)

        # Run the tests
        runner = unittest.TextTestRunner(stream=output, verbosity=2)
        result = runner.run(suite)

    except SystemExit as e:
        # Catch sys.exit calls from unittest.main()
        output.write(f"\nSystemExit called with code {e.code}\n")
    except Exception as e:
        # Catch other errors
        output.write(f"\nException: {e}\n")
    finally:
        sys.stdout = sys_stdout
        sys.stderr = sys_stderr

    return output.getvalue()

In [16]:
with gr.Blocks() as ui:
    with gr.Row():
        python = gr.Textbox(label="Python code:", lines=10, value=example2)
        test = gr.Textbox(label="Unit tests", lines=10)
        test_run = gr.Textbox(label="Test results", lines=10)
    with gr.Row():
        model = gr.Dropdown(["GPT", "Claude","Google"], label="Select model", value="GPT")
        generate = gr.Button("Generate unit tests")
        run = gr.Button("Run unit tests")

    generate.click(select_model, inputs=[python, model], outputs=[test])
    run.click(execute_python, inputs=[test],outputs=[test_run])

ui.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9adbd4502ded8de5fb.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


