961: Python function to convert a roman numeral to an integer.

In [100]:
%%writefile bugged_roman.py

def roman_to_int(s):
    rom_val = {'I': 1, 'V': 5, 'X': 10, 'L': 50, 'C': 100, 'D': 500, 'M': 1000}
    int_val = 0
    for i in range(len(s)):
        if i > 1 and rom_val[s[i]] > rom_val[s[i - 1]]:   # bug: (i > 0 to i > 1)
            int_val += rom_val[s[i]] - 2 * rom_val[s[i - 1]]
        else:
            int_val += rom_val[s[i]]
    return int_val

Overwriting bugged_roman.py


LLM Testing

In [101]:
%%writefile llm_test_roman.py

import pytest
from bugged_roman import roman_to_int


def test_basic_roman_numerals():
    assert roman_to_int("I") == 1
    assert roman_to_int("V") == 5
    assert roman_to_int("X") == 10
    assert roman_to_int("L") == 50
    assert roman_to_int("C") == 100
    assert roman_to_int("D") == 500
    assert roman_to_int("M") == 1000

def test_repeated_numerals():
    assert roman_to_int("III") == 3
    assert roman_to_int("XX") == 20
    assert roman_to_int("CC") == 200
    assert roman_to_int("MMM") == 3000

def test_mixed_cases():
    assert roman_to_int("XIV") == 14
    assert roman_to_int("XXIX") == 29
    assert roman_to_int("LXXIX") == 79

def test_complex_numerals():
    assert roman_to_int("CCLXXXIX") == 289
    assert roman_to_int("MCMXCIV") == 1994
    assert roman_to_int("MMVIII") == 2008

def test_maximum_value():
    assert roman_to_int("MMMCMXCIX") == 3999  # Maximum standard Roman numeral


Overwriting llm_test_roman.py


In [102]:
!pytest -v llm_test_roman.py

platform win32 -- Python 3.13.2, pytest-8.4.2, pluggy-1.6.0 -- C:\Python313\python.exe
cachedir: .pytest_cache
hypothesis profile 'default'
rootdir: d:\IIIT HYD\SSD\Final project\HumanVsLLM\961
plugins: anyio-4.9.0, hypothesis-6.147.0
[1mcollecting ... [0mcollected 5 items

llm_test_roman.py::test_basic_roman_numerals [32mPASSED[0m[32m                      [ 20%][0m
llm_test_roman.py::test_repeated_numerals [32mPASSED[0m[32m                         [ 40%][0m
llm_test_roman.py::test_mixed_cases [32mPASSED[0m[32m                               [ 60%][0m
llm_test_roman.py::test_complex_numerals [32mPASSED[0m[32m                          [ 80%][0m
llm_test_roman.py::test_maximum_value [32mPASSED[0m[32m                             [100%][0m



Human Testing

In [103]:
%%writefile human_test_roman.py

import pytest
from hypothesis import given, strategies as st
from bugged_roman import roman_to_int

def int_to_roman(num):
    num_map = {
        1: "I",
        5: "V",    4: "IV",
        10: "X",   9: "IX",
        50: "L",   40: "XL",
        100: "C",  90: "XC",
        500: "D",  400: "CD",
        1000: "M", 900: "CM",
    }
    r = ''
    for n in [1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1]:
        while n <= num:
            r += num_map[n]
            num-=n
    return r


VALID_ROMANS = [
    "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X",
    "XI", "XII", "XIII", "XIV", "XV", "XVI", "XVII", "XVIII", "XIX", "XX",
    "XL", "L", "XC", "C", "CD", "D", "CM", "M"
]

roman_st = st.sampled_from(VALID_ROMANS)

roman_pair_st = st.tuples(roman_st, roman_st)

@given(roman_st)
def test_positive_output(roman):
    """Property 1: roman_to_int(s) should always return a positive integer."""
    value = roman_to_int(roman)
    assert isinstance(value, int)
    assert value > 0


@given(roman_pair_st)
def test_monotonicity(pair):
    """Property 2: Ordering of Roman numerals should reflect integer ordering (if distinct)."""
    a, b = pair
    val_a = roman_to_int(a)
    val_b = roman_to_int(b)
    if val_a < val_b:
        assert a != b
    if val_a == val_b:
        assert a == b or (roman_to_int(a) == roman_to_int(b))


@given(roman_st)
def test_conversion(roman):
    """Property 3: int_to_roman(roman_to_int(s)) should equal s for valid normalized numerals."""
    value = roman_to_int(roman)
    back = int_to_roman(value)
    assert back == roman



Overwriting human_test_roman.py


In [104]:
!pytest -v human_test_roman.py

platform win32 -- Python 3.13.2, pytest-8.4.2, pluggy-1.6.0 -- C:\Python313\python.exe
cachedir: .pytest_cache
hypothesis profile 'default'
rootdir: d:\IIIT HYD\SSD\Final project\HumanVsLLM\961
plugins: anyio-4.9.0, hypothesis-6.147.0
[1mcollecting ... [0mcollected 3 items

human_test_roman.py::test_positive_output [32mPASSED[0m[32m                         [ 33%][0m
human_test_roman.py::test_monotonicity [32mPASSED[0m[32m                            [ 66%][0m
human_test_roman.py::test_conversion [31mFAILED[0m[31m                              [100%][0m

[31m[1m_______________________________ test_conversion _______________________________[0m

    [0m[37m@given[39;49;00m(roman_st)[90m[39;49;00m
>   [94mdef[39;49;00m[90m [39;49;00m[92mtest_conversion[39;49;00m(roman):[90m[39;49;00m
                   ^^^[90m[39;49;00m

[1m[31mhuman_test_roman.py[0m:55: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

roman = 'XL'

    [0m