In [1]:
import delta_debugging
from delta_debugging.DD_mod import DDMods
import json

In [2]:

class TestDD(DDMods):
    def __init__(self):
        DDMods.__init__(self)
        self.debug_dd = 0
        self.verbose = 0

    def _test(self, deltas):
        print('Testing case {:11}. '.format(
            '"' + "".join([x[1] for x in deltas])))
        if deltas == []:
            print('Test passed: empty string')
            return self.PASS
        try:
            # Attempt to load the JSON
            parsed_json = json.loads(deltas_to_str(deltas))

            for key in parsed_json:
                # Check if value is a list
                if isinstance(parsed_json[key], list):
                    print(f"Test failed: {key} is a list.")
                    return self.FAIL
            return self.PASS

        except json.JSONDecodeError as e:
            # If there's a decoding error, print the error message
            print(f"Test unresolved: {e}")
            return self.UNRESOLVED


def str_to_deltas(test_input):
    deltas = list(map(lambda x: (x, test_input[x]), range(len(test_input))))
    return deltas


def deltas_to_str(deltas):
    return "".join([x[1] for x in deltas])


## Min fail input

In [15]:
mydd = TestDD()
string1 = '{"baz": 7, "baaa": [1, 2]}'
test_input = string1
deltas = list(map(lambda x: (x, test_input[x]), range(len(test_input))))
c = mydd.ddmin(deltas)              # Invoke DDMIN
minimal = "".join([x[1] for x in c])

Testing case "          . 
Test passed: empty string
Testing case "{"baz": 7, "baaa": [1, 2]}. 
Test failed: baaa is a list.
Testing case "aaa": [1, 2]}. 
Test unresolved: Expecting value: line 1 column 1 (char 0)
Testing case "{"baz": 7, "b. 
Test unresolved: Unterminated string starting at: line 1 column 12 (char 11)
Testing case ": 7, "baaa": [1, 2]}. 
Test unresolved: Expecting value: line 1 column 1 (char 0)
Testing case "{"baz"baaa": [1, 2]}. 
Test unresolved: Expecting ':' delimiter: line 1 column 7 (char 6)
Testing case "{"baz": 7, "[1, 2]}. 
Test unresolved: Unterminated string starting at: line 1 column 12 (char 11)
Testing case "{"baz": 7, "baaa": . 
Test unresolved: Expecting value: line 1 column 20 (char 19)
Testing case "az": 7, "baaa": [1, 2]}. 
Test unresolved: Expecting value: line 1 column 1 (char 0)
Testing case "{"b: 7, "baaa": [1, 2]}. 
Test unresolved: Expecting ':' delimiter: line 1 column 10 (char 9)
Testing case "{"baz", "baaa": [1, 2]}. 
Test unresolved: Expec

In [16]:
print('Found minimal test case: "{}"'.format(minimal))

Found minimal test case: "{"":7,"":[]}"


## Max fail input

In [3]:
mydd = TestDD()
string1 = '{"baz": 7, "baaa": [1, 2]}'
string1 = '{"":7,"":[]}'
string1 = '{ "foo": 7,"bar" :[]}'
# string1 = minimal # '{"":7,"":[]}'
string2 = '{ "foo": "bar" }'
mods = mydd.get_mods(string1, string2)
c1 = str_to_deltas(string1)
c2 = str_to_deltas(string2)
c = mods

In [10]:
mods

[(9, '7', 'REMOVE'),
 (10, ',', 'REMOVE'),
 (17, ':', 'REMOVE'),
 (18, '[', 'REMOVE'),
 (19, ']', 'REMOVE')]

In [11]:
print("Expanding failure input ", mydd.pretty(c1), " to ", mydd.pretty(c2))

Expanding failure input  { "foo": 7,"bar" :[]}  to  { "foo": "bar" }


In [12]:
(c, c1, c2) = mydd.dddiff_mods(c1, c2, mods)

Testing case "{ "foo": "bar" :[]}. 
Test unresolved: Expecting ',' delimiter: line 1 column 16 (char 15)
Testing case "{ "foo": 7,"bar" }. 
Test unresolved: Expecting ':' delimiter: line 1 column 18 (char 17)
Testing case "{ "foo": ,"bar" :[]}. 
Test unresolved: Expecting value: line 1 column 10 (char 9)
Testing case "{ "foo": 7"bar" :[]}. 
Test unresolved: Expecting ',' delimiter: line 1 column 11 (char 10)
Testing case "{ "foo": 7,"bar" []}. 
Test unresolved: Expecting ':' delimiter: line 1 column 18 (char 17)
Testing case "{ "foo": 7,"bar" :}. 
Test unresolved: Expecting value: line 1 column 19 (char 18)
Testing case "{ "foo": 7,"bar" :]}. 
Test unresolved: Expecting value: line 1 column 19 (char 18)
Testing case "{ "foo": 7,"bar" :[}. 
Test unresolved: Expecting value: line 1 column 20 (char 19)


In [13]:
print("The minimal failure to ", mydd.pretty(c2), " is ", mydd.pretty(c1))
print("The difference is ", c)

The minimal failure to  { "foo": "bar" }  is  { "foo": 7,"bar" :[]}
The difference is  [(9, '7', 'REMOVE'), (10, ',', 'REMOVE'), (17, ':', 'REMOVE'), (18, '[', 'REMOVE'), (19, ']', 'REMOVE')]


### Verify input conditions

In [38]:
mydd._test(c1)
mydd._test(c2)

Testing case "{"":7,"":[]}. 
Test failed:  is a list.
Testing case "{ "foo": "bar" }. 


'PASS'

In [43]:
mydd.pretty(c1), mydd.pretty(c2)

('{"baz": 7, "baaa": [1, 2]}', '{ "foo": "bar" }')

In [46]:
cs = mydd.split(mods, 2)
temp_c1 = mydd._DDMods__modsapply(c1, cs[0])
mydd.pretty(temp_c1), mydd.pretty(c1), mydd.pretty(c2)
# temp_mods = cs[1]
# prepend, inserted, removed = mydd._DDMods__split_mods(temp_mods)
# mydd._DDMods__apply_remove(temp_c1, removed)
# # mydd._DDMods__apply_insert(temp_c1, inserted)
# # print(deltas_to_str(mydd._DDMods__modsapply(temp_c1, )))

('{ "oofbaz":: 7, rab""baaa": [1, 2]}',
 '{"baz": 7, "baaa": [1, 2]}',
 '{ "foo": "bar" }')

## Different get_mods

### Jsondiff

In [29]:
from jsondiff import diff

In [30]:
obj1 = json.loads(string1)
obj2 = json.loads(string2)
changes = diff(obj1, obj2)

In [33]:
obj1, obj2, changes

({'foo': 7, 'bar': []}, {'foo': 'bar'}, {'foo': 'bar', delete: ['bar']})

In [36]:
def process_mods(changes, path=""):
    mods = []
    if isinstance(changes, dict):  # If it's a dictionary, check for add/remove/modify
        for key, value in changes.items():
            new_path = f"{path}/{key}" if path else key
            if value is None:  # Indicates deletion
                mods.append((new_path, 'REMOVE'))
            elif isinstance(value, dict):  # Substructure changed
                process_mods(value, new_path)
            else:  # Added or modified
                mods.append((new_path, 'ADD'))
    elif isinstance(changes, list):  # If it's a list, handle additions/removals
        for idx, value in enumerate(changes):
            new_path = f"{path}[{idx}]"
            mods.append((new_path, 'ADD'))
    return mods

In [37]:
mods = process_mods(changes)
mods

[('foo', 'ADD'), (delete, 'ADD')]

### Old diff

In [21]:
import difflib 
mods = []
s = difflib.SequenceMatcher()
s.set_seqs(string1, string2)
matching_blocks = s.get_matching_blocks()

In [25]:
string1, string2

('{ "foo": 7,"bar" :[]}', '{ "foo": "bar" }')

In [24]:
s.get_opcodes()

[('equal', 0, 9, 0, 9),
 ('delete', 9, 11, 9, 9),
 ('equal', 11, 17, 9, 15),
 ('delete', 17, 20, 15, 15),
 ('equal', 20, 21, 15, 16)]

In [22]:
# Traverse the matching blocks and identify insertions
for i, block in enumerate(matching_blocks):
    # Check fore insertion before first match
    if i == 0 and block.b > 0:
        insert_str = string2[:block.b]
        mods.append((-1, insert_str, mydd.ADD))
    # Check for insertions between matches
    if i < len(matching_blocks) - 1:
        next_block = matching_blocks[i + 1]
        insert_str = string2[(block.b+1):next_block.b]
        mods.append((block.a, insert_str, mydd.ADD))

In [23]:
mods

[(0, ' "foo": ', 'ADD'), (11, 'bar" ', 'ADD'), (20, '', 'ADD')]

### Byte mods

In [4]:
bytes1 = b"abcdef"
bytes2 = b"abXcdeYf"
modifications = mydd.get_mods(bytes1, bytes2)

In [6]:
import difflib 
list(difflib.ndiff(bytes1, bytes2))

['  97', '  98', '+ 88', '  99', '  100', '  101', '+ 89', '  102']

In [7]:
modifications

[(1, '88', 'ADD'), (4, '89', 'ADD')]

## Debug utility

In [97]:
mydd = TestDD()
string1 = '{"baz": 7, "baaa": [1, 2]}'
string1 = '{"":7,"":[]}'
# string1 = minimal # '{"":7,"":[]}'
string2 = '{"foo":"bar" }'
mods = mydd.get_mods(string1, string2)
c1 = str_to_deltas(string1)
c2 = str_to_deltas(string2)
c = mods

### break down

In [98]:
c = mods
c

[(1, 'f', 'ADD'),
 (1, 'o', 'ADD'),
 (1, 'o', 'ADD'),
 (4, '7', 'REMOVE'),
 (5, ',', 'REMOVE'),
 (6, 'b', 'ADD'),
 (6, 'a', 'ADD'),
 (6, 'r', 'ADD'),
 (7, ' ', 'ADD'),
 (8, ':', 'REMOVE'),
 (9, '[', 'REMOVE'),
 (10, ']', 'REMOVE')]

In [99]:
print(mydd.pretty(c1))
cs = mydd.split(c, 2)
cs

{"":7,"":[]}


[[(1, 'f', 'ADD'),
  (1, 'o', 'ADD'),
  (1, 'o', 'ADD'),
  (4, '7', 'REMOVE'),
  (5, ',', 'REMOVE'),
  (6, 'b', 'ADD')],
 [(6, 'a', 'ADD'),
  (6, 'r', 'ADD'),
  (7, ' ', 'ADD'),
  (8, ':', 'REMOVE'),
  (9, '[', 'REMOVE'),
  (10, ']', 'REMOVE')]]

In [62]:
c1, cs[0]

([(0, '{'),
  (1, '"'),
  (2, '"'),
  (3, ':'),
  (4, '7'),
  (5, ','),
  (6, '"'),
  (7, '"'),
  (8, ':'),
  (9, '['),
  (10, ']'),
  (11, '}')],
 [(1, 'f', 'ADD'),
  (1, 'o', 'ADD'),
  (1, 'o', 'ADD'),
  (4, '7', 'REMOVE'),
  (5, ',', 'REMOVE'),
  (6, 'b', 'ADD')])

In [75]:
cs[0][:4]

[(1, 'f', 'ADD'), (1, 'o', 'ADD'), (1, 'o', 'ADD'), (4, '7', 'REMOVE')]

In [100]:
# mydd.test_mods_and_resolve(c1, cs[0], c2, mydd.REMOVE)
prepend, inserted, removed = mydd._DDMods__split_mods(cs[0])
# mydd._DDMods__apply_insert(c1, inserted)

In [101]:
insertions = {}
for insert_idx, char in inserted:
    if insert_idx in insertions:
        insertions[insert_idx].append(char)
    else:
        insertions[insert_idx] = [char]
insertions

{1: ['f', 'o', 'o'], 6: ['b']}

In [102]:
mydd.pretty(c1)

'{"":7,"":[]}'

In [119]:
deltas = c1.copy()
for insert_idx in sorted(insertions.keys(), reverse=True):
    chars = insertions[insert_idx]
    for i in range(len(deltas)-1, -1, -1):
        idx, _ = deltas[i]
        if idx == insert_idx:
            # Insert chars in order after the found index
            for char in reversed(chars):
                deltas.insert(i + 1, (idx, char))  # Insert after the found index
            break

In [126]:
# deltas = c1.copy()
for insert_idx in sorted(insertions.keys(), reverse=True):
    chars = insertions[insert_idx]
    # Find the insert index in deltas in reversed order
    for i, (idx, _) in enumerate(reversed(deltas)):
        if idx == insert_idx:
            # Insert chars in reversed order
            for char in (chars):
                deltas.insert(len(deltas) - i, (idx, char))
            break

In [127]:
# deltas = c1.copy()
print(mydd.pretty(deltas))

{"foofoo":7,"bb":[]}


In [None]:
i = 0
(t, csub) = mydd.test_mods_and_resolve(c1, cs[i], c2, mydd.REMOVE)
print(t)
print(mydd.pretty(cs[i]))
print(mydd.pretty(csub))

Testing case "{"":""b:[]foo}. 
Test unresolved: Expecting ',' delimiter: line 1 column 7 (char 6)
UNRESOLVED
foo7,b
{"":""b:[]foo}


In [25]:
mydd._DDMods__modsminus(c, cs[0])

[(1, 'o', 'ADD'),
 (3, ' ', 'ADD'),
 (4, '7', 'REMOVE'),
 (5, ',', 'REMOVE'),
 (6, 'b', 'ADD'),
 (6, 'a', 'ADD'),
 (6, 'r', 'ADD'),
 (7, ' ', 'ADD'),
 (8, ':', 'REMOVE'),
 (9, '[', 'REMOVE'),
 (10, ']', 'REMOVE')]

In [13]:
print(cs[i])
c1 = csub
c = mydd._DDMods__modsminus(c, cs[i])
print(c)
n = 2
print(n)

[(5, ',', 'REMOVE'), (6, 'b', 'ADD'), (6, 'a', 'ADD'), (6, 'r', 'ADD')]
[(0, ' ', 'ADD'), (1, 'f', 'ADD'), (1, 'o', 'ADD'), (1, 'o', 'ADD'), (3, ' ', 'ADD'), (4, '7', 'REMOVE'), (7, ' ', 'ADD'), (8, ':', 'REMOVE'), (9, '[', 'REMOVE'), (10, ']', 'REMOVE')]
2


In [47]:
mydd.pretty(c1), mydd.pretty(c2)

('{"":7,"":[]}', '{ "foo": "bar" }')

In [48]:
cs

[[(0, ' ', 'ADD'),
  (1, 'f', 'ADD'),
  (1, 'o', 'ADD'),
  (1, 'o', 'ADD'),
  (3, ' ', 'ADD'),
  (4, '7', 'REMOVE'),
  (5, ',', 'REMOVE')],
 [(6, 'b', 'ADD'),
  (6, 'a', 'ADD'),
  (6, 'r', 'ADD'),
  (7, ' ', 'ADD'),
  (8, ':', 'REMOVE'),
  (9, '[', 'REMOVE'),
  (10, ']', 'REMOVE')]]

## Debug functions

#### insertion

In [4]:
cs = mydd.split(mods, 2)
temp_c1 = mydd._DDMods__modsapply(c1, cs[0])
mydd.pretty(c1), mydd.pretty(c2), mydd.pretty(temp_c1), 

('{"baz": 7, "baaa": [1, 2]}',
 '{ "foo": "bar" }',
 '{ "bazfoo": 7, "baaa": [1, 2]}')

In [7]:
prepend, inserted, removed = mydd._DDMods__split_mods(cs[0])

In [8]:
inserted

[(0, ' '), (4, 'f'), (4, 'o'), (4, 'o')]

### Fix apply_remove

In [None]:
delta_dict = {idx: char for idx, char in removed}
delta_dict

{2: 'b', 3: 'a', 4: 'z', 7: ' ', 8: '7', 9: ','}

In [16]:
deltas = c1
deltas

[(0, '{'),
 (1, '"'),
 (2, 'b'),
 (3, 'a'),
 (4, 'z'),
 (5, '"'),
 (6, ':'),
 (7, ' '),
 (8, '7'),
 (9, ','),
 (10, ' '),
 (11, '"'),
 (12, 'b'),
 (13, 'a'),
 (14, 'a'),
 (15, 'a'),
 (16, '"'),
 (17, ':'),
 (18, ' '),
 (19, '['),
 (20, '1'),
 (21, ','),
 (22, ' '),
 (23, '2'),
 (24, ']'),
 (25, '}')]

In [18]:
temp_c1 = [(idx, char) for idx, char in deltas if idx not in delta_dict]

In [19]:
mydd.pretty(temp_c1)

'{"": "baaa": [1, 2]}'