In [1]:
import sys
print(sys.executable)  # should show C:\Python313\python.exe

c:\Users\91868\AppData\Local\Programs\Python\Python311\python.exe


In [2]:
# install packages into THIS kernel
!{sys.executable} -m pip install -U pip setuptools wheel
!{sys.executable} -m pip install pytest hypothesis ipytest



In [3]:
import ipytest
ipytest.autoconfig()

In [4]:
%%writefile reverse_vowels.py
def reverse_vowels(str1: str) -> str:
    vowels = ""
    for char in str1:
        if char in "aeiouAEIOU":
            vowels += char
    result_string = ""
    for char in str1:
        if char in "aeiouAEIOU":
            result_string += vowels[-1]
            vowels = vowels[:-1]
        else:
            result_string += char
    return result_string


Overwriting reverse_vowels.py


In [5]:
from reverse_vowels import reverse_vowels
print(reverse_vowels("Python"))  # Python
print(reverse_vowels("USA"))     # ASU
print(reverse_vowels("ab"))      # ab

Python
ASU
ab


BUGGY IMPLEMENTATION :

In [6]:
%%writefile buggy.py
def reverse_vowels(str1: str) -> str:
    V = "aeiou"          # BUG: uppercase vowels are ignored
    vowels = [c for c in str1 if c in V]
    out = []
    j = len(vowels) - 1
    for ch in str1:
        if ch in V:
            out.append(vowels[j])
            j -= 1
        else:
            out.append(ch)
    return "".join(out)


Overwriting buggy.py


In [7]:
from buggy import reverse_vowels
print(reverse_vowels("Python"))  # likely "Python" (still fine)
print(reverse_vowels("USA"))     # BUG shows up: should be "ASU" but won't be
print(reverse_vowels("ab"))      # "ab"


Python
USA
ab


LLM based Tests

In [8]:
%%writefile test_llm_generated.py
import pytest
from buggy import reverse_vowels

def test_examples():
    assert reverse_vowels("Python") == "Python"
    assert reverse_vowels("USA") == "ASU"      # should FAIL with our bug
    assert reverse_vowels("ab") == "ab"

def test_more_edges():
    assert reverse_vowels("") == ""
    assert reverse_vowels("a") == "a"
    assert reverse_vowels("ae") == "ea"
    assert reverse_vowels("HELLO") == "HOLLE"  # should FAIL with our bug


Overwriting test_llm_generated.py


In [9]:
import ipytest, sys
ipytest.autoconfig()

In [10]:
!pytest -q test_llm_generated.py

[31mF[0m[31mF[0m[31m                                                                       [100%][0m
[31m[1m________________________________ test_examples ________________________________[0m

    [0m[94mdef[39;49;00m[90m [39;49;00m[92mtest_examples[39;49;00m():[90m[39;49;00m
        [94massert[39;49;00m reverse_vowels([33m"[39;49;00m[33mPython[39;49;00m[33m"[39;49;00m) == [33m"[39;49;00m[33mPython[39;49;00m[33m"[39;49;00m[90m[39;49;00m
>       [94massert[39;49;00m reverse_vowels([33m"[39;49;00m[33mUSA[39;49;00m[33m"[39;49;00m) == [33m"[39;49;00m[33mASU[39;49;00m[33m"[39;49;00m      [90m# should FAIL with our bug[39;49;00m[90m[39;49;00m
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^[90m[39;49;00m
[1m[31mE       AssertionError: assert 'USA' == 'ASU'[0m
[1m[31mE         [0m
[1m[31mE         [0m[91m- ASU[39;49;00m[90m[39;49;00m[0m
[1m[31mE         [92m+ USA[39;49;00m[90m[39;49;00m[0m

[1m[31mtest_llm_generated.py[

Human-Property Based Tests

In [11]:
%%writefile test_properties.py
from hypothesis import given, strategies as st
from buggy import reverse_vowels

V = set("aeiouAEIOU")
def is_vowel(c): return c in V

@given(st.text())
def test_length_preserved(s):
    out = reverse_vowels(s)
    assert len(out) == len(s)

@given(st.text())
def test_non_vowel_positions_unchanged(s):
    out = reverse_vowels(s)
    for i, ch in enumerate(s):
        if not is_vowel(ch):
            assert out[i] == ch

@given(st.text())
def test_double_application_is_identity(s):
    # reversing vowels twice should return the original string
    out = reverse_vowels(reverse_vowels(s))
    assert out == s


Overwriting test_properties.py


In [12]:
!pytest -q test_properties.py

[32m.[0m[32m.[0m[32m.[0m[32m                                                                      [100%][0m
[32m[32m[1m3 passed[0m[32m in 3.10s[0m[0m


In [13]:
import json, pytest

# Run each suite separately and capture exit codes: 0 = pass, >0 = fail
llm_exit = pytest.main(["-q", "test_llm_generated.py", "--maxfail=1"])
human_exit = pytest.main(["-q", "test_properties.py", "--maxfail=1"])

results = {
    "found_by_llm": (llm_exit != 0),
    "found_by_human": (human_exit != 0)
}
with open("results.json", "w") as f:
    json.dump(results, f, indent=2)

results


[31mF[0m
[31m[1m________________________________ test_examples ________________________________[0m

    [0m[94mdef[39;49;00m[90m [39;49;00m[92mtest_examples[39;49;00m():[90m[39;49;00m
        [94massert[39;49;00m reverse_vowels([33m"[39;49;00m[33mPython[39;49;00m[33m"[39;49;00m) == [33m"[39;49;00m[33mPython[39;49;00m[33m"[39;49;00m[90m[39;49;00m
>       [94massert[39;49;00m reverse_vowels([33m"[39;49;00m[33mUSA[39;49;00m[33m"[39;49;00m) == [33m"[39;49;00m[33mASU[39;49;00m[33m"[39;49;00m      [90m# should FAIL with our bug[39;49;00m[90m[39;49;00m
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^[90m[39;49;00m
[1m[31mE       AssertionError: assert 'USA' == 'ASU'[0m
[1m[31mE         [0m
[1m[31mE         [0m[91m- ASU[39;49;00m[90m[39;49;00m[0m
[1m[31mE         [92m+ USA[39;49;00m[90m[39;49;00m[0m

[1m[31mtest_llm_generated.py[0m:6: AssertionError
[31mFAILED[0m test_llm_generated.py::[1mtest_examples[0m - AssertionErr

{'found_by_llm': True, 'found_by_human': False}

Bug Dossier: The implementation only considers lowercase vowels ("aeiou") and ignores uppercase vowels, violating the spec that requires both cases.

Results: LLM example tests caught the bug via explicit cases like "USA" and "HELLO". Human property tests also caught the bug because properties such as double application is identity and non-vowel positions unchanged fail once uppercase vowels are present and mishandled.

Why: Example tests succeed when prompts include mixed-case examples; property tests succeed because they explore randomized strings (including uppercase) and check invariants that uppercase mishandling breaks.