In [1]:
import os
import io
import sys
import textwrap
import re
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
from pathlib import Path
import subprocess
from IPython.display import Markdown, display

In [13]:
load_dotenv(override=True)
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
if anthropic_api_key:
    print(f"Anthoropic API Key exists")
else:
    print("Anthoropic API Key not set")

Anthoropic API Key exists


In [14]:
openai = OpenAI()
anthropic_model= "claude-sonnet-4-5-20250929"
anthropic_url = "https://api.anthropic.com/v1/"
anthropic = OpenAI(api_key=anthropic_api_key, base_url=anthropic_url)

In [15]:
SYSTEM_PROMPT= """
You are a senior Python engineer. Produce a single, self-contained pytest file.
Rules:
- Output only Python test code (no prose, no markdown fences).
- Use plain pytest tests (functions), no classes unless unavoidable.
- Deterministic: avoid network/IO; seed randomness if used.
- Import the target module by module name only.
- Cover every public function and method with at least one tiny test.
- Prefer straightforward, fast assertions.
"""

In [16]:
def create_user_tests_prompt(code, module_name):
        """Create a prompt for the LLM to generate unit tests"""
        return f"""Please generate comprehensive unit tests for the following Python code.
        
    Guidelines:
    - Use appropriate testing framework for Python
    - Do not insert in the response the function for the tests.
    - Create tests for all functions and methods
    - Include both positive and negative test cases
    - Test edge cases and error conditions
    - Use meaningful test names that describe what is being tested
    - Include setup and teardown methods if needed
    - Add mock objects for external dependencies (like database connections)
    - Follow testing best practices for Python
    - Import as: `import {module_name}`

    Here's the code to test:

    {code}

    Please return only the unit test code without any additional explanation or markdown formatting."""


In [17]:
def messages_for(python, module_name):
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": create_user_tests_prompt(python, module_name)}
    ]
 

In [18]:
def call_anthropic_test_generator(python, module_name):
    client = anthropic
    response = client.chat.completions.create(model=anthropic_model, messages=messages_for(python, module_name), reasoning_effort="high")
    reply = response.choices[0].message.content
    return reply

In [19]:
function_to_test = """
    def lengthOfLongestSubstring(s):
        if not isinstance(s, str):
            raise TypeError("Input must be a string")
        max_length = 0
        substring = ""
        start_idx = 0
        while start_idx < len(s):
            string = s[start_idx:]
            for i, x in enumerate(string):
                substring += x
                if len(substring) == len(set((list(substring)))):
                    
                    if len(set((list(substring)))) > max_length:
                        
                        max_length = len(substring)

            start_idx += 1
            substring = ""
                  
                
        return max_length"""

In [20]:
test_code = """```python
import pytest

# Unit tests using pytest
def test_lengthOfLongestSubstring():
    assert lengthOfLongestSubstring("abcabcbb") == 3  # Case with repeating characters
    assert lengthOfLongestSubstring("bbbbb") == 1    # Case with all same characters
    assert lengthOfLongestSubstring("pwwkew") == 3    # Case with mixed characters
    assert lengthOfLongestSubstring("") == 0           # Empty string case
    assert lengthOfLongestSubstring("abcdef") == 6     # All unique characters
    assert lengthOfLongestSubstring("abca") == 3       # Case with pattern and repeat
    assert lengthOfLongestSubstring("dvdf") == 3       # Case with repeated characters separated
    assert lengthOfLongestSubstring("a") == 1           # Case with single character
    assert lengthOfLongestSubstring("au") == 2          # Case with unique two characters
```"""

In [27]:

def extract_code(text):
    return text.replace("```python\n", "").replace("```", "")
    

def execute_tests_in_venv(code_to_test, tests, module_name, python_interpreter=sys.executable):
    """
    Execute the given Python code string within the specified virtual environment.
    
    Args:
    - code_str: str, the Python code to execute.
    - venv_dir: str, the directory path to the virtual environment created by pipenv.
    """
    if not module_name or not code_to_test.strip() or not tests.strip():
        return "‚ùå Provide module name, module code, and tests.", ""

    if not python_interpreter:
        raise EnvironmentError("Python interpreter not found in the specified virtual environment.")

    # Prepare the command to execute the code
    code_str = textwrap.dedent(code_to_test)
    test_code = extract_code(tests)
    code_str_path = Path("tests")
    test_path = code_str_path / "test"
    test_path.mkdir(parents=True, exist_ok=True)
    (test_path / f"test_{module_name}.py").write_text(test_code)
    (code_str_path / f"{module_name}.py").write_text(code_str)
    # command = ["pytest", str(test_path)]
    env = os.environ.copy()
    env["PYTHONPATH"] = str(code_str_path) + os.pathsep + env.get("PYTHONPATH", "")
    cmd = [sys.executable, "-m", "pytest", "-q"]  # quiet output, but still includes summary
    # proc = subprocess.run(cmd, cwd=code_str_path, env=env, text=True, capture_output=True)

    try:
        result = subprocess.run(cmd, cwd=code_str_path, env=env, text=True, capture_output=True)
        print("Tests ran successfully!")
        print((result.stdout or "") + ("\n" + result.stderr if result.stderr else ""))
        return result.stdout
    except subprocess.CalledProcessError as e:
        print("Some tests failed!")
        print("Output:\n", e.stdout)
        print("Errors:\n", e.stderr)
        # Extracting failed test information
        failed_tests = []
        for line in e.stdout.splitlines():
            if "FAILED" in line and "::" in line:
                failed_tests.append(line.strip())
        if failed_tests:
            print("Failed Tests:")
            for test in failed_tests:
                print(test)
    
        return e.stdout
    

In [None]:
with gr.Blocks() as ui:
    gr.Markdown("## Write unit tests for Python code")
    with gr.Row():
        with gr.Column(scale=1, min_width=300):
            module_name_tb = gr.Textbox(
                label="Module name (used in `import <name>`)",
                value="mymodule"
            )
    with gr.Row():
        with gr.Column(scale=1, min_width=300):
            python = gr.Textbox(label="Python code:", value=function_to_test, lines=10)
            unit_tests = gr.Button("Write unit tests")
        with gr.Column(scale=1, min_width=300):
            unit_tests_out = gr.TextArea(label="Unit tests", value=test_code, elem_classes=["python"])
            test_run = gr.Button("Run unit tests")
    with gr.Row():
        
        python_out = gr.TextArea(label="Unit tests result", elem_classes=["python"])
        

    unit_tests.click(call_anthropic_test_generator, inputs=[python, module_name_tb], outputs=[unit_tests_out])
    test_run.click(execute_tests_in_venv, inputs=[python, unit_tests_out, module_name_tb], outputs=[python_out])


ui.launch(inbrowser=True)
# ui.launch()

* Running on local URL:  http://127.0.0.1:7864
* To create a public link, set `share=True` in `launch()`.




Tests ran successfully!

/Users/megharajpara/Desktop/Work/Learning/AI Engineer/llmprojects/Practice/.venv/bin/python: No module named pytest

Tests ran successfully!

/Users/megharajpara/Desktop/Work/Learning/AI Engineer/llmprojects/Practice/.venv/bin/python: No module named pytest

