# Notebook to check all outputs are as expected

In [None]:
import json
import os
import pandas as pd
import tqdm

In [None]:
region = "thailand"

In [None]:
keys_files = os.listdir(f"/home/users/katriona/amber-inferences/keys/{region}_final/")
keys_files = [os.path.join(f"/home/users/katriona/amber-inferences/keys/{region}_final/", x) for x in keys_files]
print(keys_files)

In [None]:
def check_file(keys_file, region):
    # Load the keys file
    with open(keys_file, "r") as f:
        keys = json.load(f)

    dep = os.path.basename(keys_file).split(".")[0]
    print(f"🎥 Checking {dep}...")

    # Where to look for CSV files
    csv_dir = f"/gws/nopw/j04/ceh_generic/kgoldmann/{region}_inferences_tracking/{dep}"  # current directory

    all_missing_keys = {}

    for date, image_paths in tqdm.tqdm(keys.items()):
        jpgs = [os.path.basename(p) for p in image_paths]

        csv_path = os.path.join(csv_dir, f"{dep}_{date}.csv")
        if not os.path.exists(csv_path):
            print(f" ⚠️ CSV for {date} not found: {csv_path}")
            continue

        # Load the CSV
        try:
            df = pd.read_csv(csv_path, low_memory=False, encoding='utf-8')

            # Flatten all strings in the DataFrame to search for filenames
            analysed_images = list(set(df["image_path"]))
            analysed_images = [os.path.basename(x) for x in analysed_images if isinstance(x, str)]
            missing = [os.path.basename(jpg) for jpg in jpgs if os.path.basename(jpg) not in analysed_images]

            if missing:
                print(f"- ❌ {len(missing)}/{len(jpgs)} missing jpgs for {os.path.basename(csv_path)}")

                # create a subset of the format keys_file made up of the missing jpgs
                missing_keys = {date: [os.path.join(dep, "snapshot_images", x) for x in missing]}
                all_missing_keys.update(missing_keys)

        except Exception as e:
            print(f" ⚠️ Error processing {csv_path}: {e}")

    if all_missing_keys:
        # write all_missing_keys to a file
        missing_keys_file = f"/home/users/katriona/amber-inferences/keys/{region}_final_missing_keys/{dep}.json"
        print(f"❗️ Writing missing keys for {dep} to {missing_keys_file}")
        os.makedirs(os.path.dirname(missing_keys_file), exist_ok=True)

        with open(missing_keys_file, "w") as f:
            json.dump(all_missing_keys, f, indent=4)


In [None]:
for keys_file in keys_files:
    check_file(keys_file, region)