import sys
print(sys.executable) 

In [None]:
# install packages into THIS kernel
!{sys.executable} -m pip install -U pip setuptools wheel
!{sys.executable} -m pip install pytest hypothesis ipytest

In [None]:
import ipytest
ipytest.autoconfig()

In [None]:
%%writefile re_arrange_array.py
def re_arrange_array(arr, n):
    """
    Correct implementation for MBPP Task 229.

    Rearrange the array so that all negative elements appear before
    all non-negative elements (0 and positives). Modify in place and
    return the array.
    """
    j = 0
    for i in range(0, n):
        if arr[i] < 0:
            arr[i], arr[j] = arr[j], arr[i]
            j += 1
    return arr

In [None]:
from re_arrange_array import re_arrange_array

print(re_arrange_array([-1, 2, -3, 4, 5, 6, -7, 8, 9], 9))
print(re_arrange_array([12, -14, -26, 13, 15], 5))
print(re_arrange_array([10, 24, 36, -42, -39, -78, 85], 7))

BUGGY IMPLEMENTATION :

In [None]:
%%writefile buggy.py
def re_arrange_array(arr, n):
    """
    BUGGY version for MBPP Task 229.

    Bug: the loop only goes to n-1, so the last element is never
    processed. If the last element is negative, it may stay at the end.
    """
    j = 0
    # BUG: should be range(0, n), but we stop early
    for i in range(0, n - 1):
        if arr[i] < 0:
            arr[i], arr[j] = arr[j], arr[i]
            j += 1
    return arr

In [None]:
from buggy import re_arrange_array

print(re_arrange_array([-1, 2, -3, 4, 5, 6, -7, 8, 9], 9))  # might still look OK
print(re_arrange_array([1, 2, 3, -1], 4))                  # BUG: -1 is last element, may not move
print(re_arrange_array([-5], 1))                           # BUG visible when alone as last element

LLM based Tests

In [None]:
%%writefile test_llm_generated.py
import pytest
from buggy import re_arrange_array

def test_mbpp_examples():
    # These are the original MBPP tests (good for sanity)
    assert re_arrange_array([-1, 2, -3, 4, 5, 6, -7, 8, 9], 9) == [-1, -3, -7, 4, 5, 6, 2, 8, 9]
    assert re_arrange_array([12, -14, -26, 13, 15], 5) == [-14, -26, 12, 13, 15]
    assert re_arrange_array([10, 24, 36, -42, -39, -78, 85], 7) == [-42, -39, -78, 10, 24, 36, 85]

def test_additional_examples():
    # These look like “normal” tests an LLM might write
    assert re_arrange_array([], 0) == []
    assert re_arrange_array([1, 2, 3], 3) == [1, 2, 3]
    assert re_arrange_array([-1, -2, -3], 3) == [-1, -2, -3]
    assert re_arrange_array([0, -1, 2], 3) == [-1, 0, 2]

In [None]:
import ipytest, sys
ipytest.autoconfig()

In [None]:
!pytest -q test_llm_generated.py

Human-Property Based Tests

In [None]:
%%writefile test_properties.py
from hypothesis import given, strategies as st
from buggy import re_arrange_array

@given(st.lists(st.integers(min_value=-100, max_value=100), max_size=20))
def test_permutation_and_order(arr):
    # Make a copy because function mutates in place
    original = list(arr)
    result = re_arrange_array(original, len(original))

    # Property 1: multiset of elements is unchanged (permutation)
    assert sorted(result) == sorted(arr)

    # Property 2: all negative elements appear before all non-negative
    seen_non_negative = False
    for x in result:
        if x < 0:
            assert not seen_non_negative
        else:
            seen_non_negative = True

In [None]:
!pytest -q test_properties.py

In [None]:
import json, pytest

# Run each suite separately and capture exit codes: 0 = pass, >0 = fail
llm_exit = pytest.main(["-q", "test_llm_generated.py", "--maxfail=1"])
human_exit = pytest.main(["-q", "test_properties.py", "--maxfail=1"])

results = {
    "found_by_llm": (llm_exit != 0),
    "found_by_human": (human_exit != 0)
}
with open("results.json", "w") as f:
    json.dump(results, f, indent=2)

results

Bug Dossier:  
The buggy implementation in `buggy.py` iterates with `range(0, n - 1)`,
so it never processes the last element of the array. If the last element
is negative, it may remain at the end instead of being moved before the
non-negative elements.

Results:  
- LLM example tests: (fill from `results["found_by_llm"]`)  
- Human Hypothesis tests: (fill from `results["found_by_human"]`)

Why:  
Example-based tests often miss boundary cases (like “only the last
element is negative”), so the LLM may or may not find this bug depending
on its generated examples. Hypothesis, by generating many random lists,
is more likely to hit cases where the last element is negative and thus
expose the off-by-one error.