# Compare

We can compare the results but we need to be careful! They may not be the same since the hash tracking could take any of a duplicate file while the diff tracking will just look at the file itself.

As such, we will compare the HASHES the source files (directly computed) as well as the expected state at 1

In [None]:
import json, shutil, subprocess
from pathlib import Path
import gzip as gz


In [None]:
import os,sys
if (abspath := os.path.abspath('../')) not in sys.path:
    sys.path.insert(0,abspath)
import rirb

In [None]:
with open('transfer_A_hashes.json') as f:
    mA = set((a,b) for a,b in json.load(f))

In [None]:
with open('transfer_B_tracking.json') as f:
    mB = set((a,b) for a,b in json.load(f))

In [None]:
print(f'{len(mA) = }, {len(mB) = }')
print(f'{len(mA) = }, {len(mB) = }')
print(f'{len(mA.intersection(mB)) = }')

In [None]:
{dst for src,dst in mA} == {dst for src,dst in mB}

In [None]:
print(mA - mB)

In [None]:
print(mB - mA)

## Theoretical Hashes

Compute the theoretical hashes of the resulting transfer

In [None]:
dst = '../tests/testdirs/restore_poc/dst/' # Your rclone remote including : if needed
loclogs = Path('DEST/logs') # Should be LOCAL

try:
    shutil.rmtree(loclogs)
except OSError:
    pass
Path(loclogs).mkdir(parents=True,exist_ok=False)
(Path(loclogs) / '.ignore').touch()

In [None]:
cmd = ['rclone','copy',
       rirb.utils.pathjoin(dst,'logs'),loclogs,
       '--exclude','log.log']
subprocess.call(cmd)

In [None]:
states = sorted(d for d in loclogs.iterdir() if d.is_dir())

In [None]:
# Note that this is reverse of Method A code
hashes = {}
state = states[-1]
with gz.open(state / 'curr.json.gz') as cfile:
    files = json.load(cfile)
for filename,data in files.items():
    hashes[os.path.join('curr',filename)] = data['Hashes']['sha1']
    
for state in states[::-1]:
    backfile = state / 'backed_up_files.json.gz'
    if not backfile.exists():
        continue
    backpath = Path('back') / state.name
    
    with gz.open(backfile) as bfile:
        files = json.load(bfile)
    
    for filename,data in files.items():
        hashes[str(backpath /filename)] = data['Hashes']['sha1']
    

In [None]:
res = {hashes[src] for src,dst in mA} == {hashes[src] for src,dst in mB}
print(f'A == B {res}')

## Compare to truth

In [None]:
with open('states/01.json') as f:
    truth = json.load(f)

In [None]:
res = set(truth.values()) == {hashes[src] for src,dst in mA} # Transitive property also == mB
print(f'A == Truth (and == B) {res}')