In [None]:
import boto3
from boto3.dynamodb.conditions import Key, Attr
from datetime import datetime
import concurrent.futures
import json
from tqdm import tqdm

: 

In [None]:
dynamodb = boto3.resource("dynamodb")
orders_table = dynamodb.Table("TradeOrders")


def query_all_items(partition_key, filter_expression):
    """Query DynamoDB and paginate efficiently."""
    items = []
    kwargs = {
        "KeyConditionExpression": Key("SupplierID").eq(partition_key),
        "FilterExpression": filter_expression
    }

    print(f"[INFO] Starting query for partition_key={partition_key}", flush=True)
    while True:
        resp = orders_table.query(**kwargs)
        items.extend(resp["Items"])
        print(f"[PROGRESS] Retrieved {len(items)} items so far...", flush=True)
        if "LastEvaluatedKey" not in resp:
            break
        kwargs["ExclusiveStartKey"] = resp["LastEvaluatedKey"]

    print(f"[INFO] Finished query, total items retrieved: {len(items)}", flush=True)
    return items


In [None]:
def snitch_fast(supplierid, output_file="unmatched_orders.json"):

    # Handle override logic
    if not supplierid or supplierid.upper() == "SAGSNITCH":
        supplierid = "SAGREETINGS"

    print(f"[INFO] Processing supplier: {supplierid}", flush=True)

    # --------------------------
    # 1. Query PO-required orders
    # --------------------------
    print("[INFO] Querying PO-required orders...", flush=True)
    temp_orders = query_all_items(
        supplierid,
        Attr("UserField06").eq("yes")
    )
    print(f"[INFO] Found {len(temp_orders)} PO-required orders.", flush=True)

    # --------------------------
    # 2. Query orders with PayorOrderNumb
    # --------------------------
    print("[INFO] Querying orders with PayorOrderNumb...", flush=True)
    temp_po = query_all_items(
        supplierid,
        Attr("PayorOrderNumb").exists() & Attr("PayorOrderNumb").ne("")
    )
    print(f"[INFO] Found {len(temp_po)} orders with PayorOrderNumb.", flush=True)

    # Convert PO list to dictionary for O(1) lookup
    po_lookup = {str(o["PayorOrderNumb"]): True for o in temp_po}

    matched_ids = set()

    # --------------------------
    # 3. MULTITHREADED matching
    # --------------------------
    print("[INFO] Matching orders (multithreaded)...", flush=True)
    def check_match(order):
        order_id = str(order["OrderID"])
        for payor in po_lookup.keys():
            if order_id in payor:
                return order_id
        return None

    # Use tqdm for progress bar
    with concurrent.futures.ThreadPoolExecutor(max_workers=32) as executor:
        results = executor.map(check_match, temp_orders)
        for rid in tqdm(results, total=len(temp_orders), desc="Matching orders"):
            if rid:
                matched_ids.add(rid)

    print(f"[INFO] Total matched orders: {len(matched_ids)}", flush=True)

    # --------------------------
    # 4. Compute unmatched orders
    # --------------------------
    print("[INFO] Computing unmatched orders...", flush=True)
    unmatched_orders = [
        o for o in temp_orders
        if str(o["OrderID"]) not in matched_ids
    ]
    print(f"[INFO] Found {len(unmatched_orders)} unmatched orders.", flush=True)

    # Sort by CreateDate descending
    def parse_date(x):
        try:
            return datetime.fromisoformat(x)
        except:
            return datetime.min

    unmatched_orders.sort(key=lambda x: parse_date(x.get("CreateDate", "")), reverse=True)

    # --------------------------
    # 5. Save results to JSON
    # --------------------------
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(unmatched_orders, f, indent=4, ensure_ascii=False)

    print(f"[INFO] Unmatched orders saved to {output_file}", flush=True)
    print(f"[RESULT] Total unmatched orders: {len(unmatched_orders)}", flush=True)

    return unmatched_orders
