In [None]:
#!/usr/bin/env python3
import csv
import re

# --- CONFIG ---
CERTIFICATES_FILE = r""
RECOMMENDATIONS_FILE = r""
OUTPUT_FILE = r""

# Keywords (case-insensitive regex)
KEYWORDS = [
    r"computer\s*centre",
    r"computer\s*center",
    r"data\s*centre",
    r"data\s*center",
    r"data\s*hall"
    #r"\bserver\b", only include if server/comms rooms wanted
]
pattern = re.compile("|".join(KEYWORDS), re.IGNORECASE)

def row_matches(row):
    """Check if any field in row matches one of the keywords."""
    for value in row.values():
        if value and pattern.search(str(value)):
            return True
    return False

def read_csv(file_path):
    """Read a CSV file into a list of dicts."""
    with open(file_path, encoding="utf-8", newline="") as f:
        reader = csv.DictReader(f)
        return list(reader)

def filter_rows(rows):
    """Return only rows matching keywords."""
    return [r for r in rows if row_matches(r)]

def merge_cert_reco(cert_rows, reco_rows):
    """Merge both tables, preferring certificate entries for duplicate LMK_KEY."""
    merged = {}
    for r in reco_rows:
        key = r.get("LMK_KEY", "").strip()
        if key:
            merged[key] = r

    for c in cert_rows:  # overwrite if certificate exists
        key = c.get("LMK_KEY", "").strip()
        if key:
            merged[key] = c
    return list(merged.values())

def main():
    print("Loading data...")
    cert_rows = read_csv(CERTIFICATES_FILE)
    reco_rows = read_csv(RECOMMENDATIONS_FILE)

    print(f"Certificates loaded: {len(cert_rows)}")
    print(f"Recommendations loaded: {len(reco_rows)}")

    print("Filtering for data centre keywords...")
    cert_filtered = filter_rows(cert_rows)
    reco_filtered = filter_rows(reco_rows)

    print(f"Matches in certificates: {len(cert_filtered)}")
    print(f"Matches in recommendations: {len(reco_filtered)}")

    print("Merging datasets...")
    combined = merge_cert_reco(cert_filtered, reco_filtered)

    print(f"Unique LMK_KEYs after merge: {len(combined)}")

    print(f"Writing to {OUTPUT_FILE} ...")
    if combined:
        all_fieldnames = set()
        for row in combined:
            all_fieldnames.update(row.keys())
        all_fieldnames = list(all_fieldnames)

        with open(OUTPUT_FILE, "w", newline="", encoding="utf-8") as out:
            writer = csv.DictWriter(out, fieldnames=all_fieldnames)
            writer.writeheader()
            writer.writerows(combined)

    print("Finished!")


if __name__ == "__main__":
    main()
