In [None]:
import tqdm
import fs.zipfs
import fs.multifs
import os
import fs.copy
import sys
sys.path.append("/scr/ig_pipeline")
import b1k_pipeline.utils

OUT_FILENAME = "/scr/rc7.zip"

# In the order of priority
PARALLELS = [
    "/scr/rc6.zip",
    "/scr/rc7_patch.zip",
]

OBJECTS_TO_REMOVE = [
    "uuuien",
    "ccnvxc",
]

def main():
    # Get a multi-FS view over all of the parallel filesystems.
    multi_fs = fs.multifs.MultiFS()
    for priority, parallel_zip_name in enumerate(PARALLELS):
        print("Adding", parallel_zip_name)
        multi_fs.add_fs(os.path.basename(parallel_zip_name), fs.zipfs.ZipFS(parallel_zip_name), priority=priority)

    # Copy all the files to the output zip filesystem.
    print("Copying files")
    total_files = sum(1 for f in multi_fs.walk.files())
    with b1k_pipeline.utils.WriteOnly7ZipFS(OUT_FILENAME) as out_fs:
        with tqdm.tqdm(total=total_files) as pbar:
            fs.copy.copy_fs(multi_fs, out_fs, on_copy=lambda *args: pbar.update(1))

        print("Removing some objects")
        objects_dir = out_fs.opendir("objects")
        for obj_to_remove in OBJECTS_TO_REMOVE:
            objdir_glob = [x.path for x in objects_dir.glob(f"*/{obj_to_remove}")]
            assert len(objdir_glob) == 1, f"Needed exactly one dir for {obj_to_remove}, got {objdir_glob}"
            objdir = objdir_glob[0]
            print("Removing", objdir)
            objects_dir.removetree(objdir)

main()