# Evidence Ledger Builder

**Goal:** Build a signed ledger (`03_Evidence/hashes/ledger.csv`) for audit evidence without moving files.

In [1]:
from pathlib import Path
from datetime import datetime
import csv, hashlib
import pandas as pd

BASE = Path('..').resolve()
print(BASE)

ROOT   = BASE / '03_evidence'              
LEDGER = ROOT / "ledger.csv" 
print(ROOT)
print(LEDGER)


C:\Project\SafePay
C:\Project\SafePay\03_evidence
C:\Project\SafePay\03_evidence\ledger.csv


In [2]:
SIGNER = " G.Arjona / GRC"    
ENVIRONMENT = "non-prod"
PII_FLAG_DEFAULT = "synthetic"

def ensure_header(csv_path: Path, header):
    csv_path.parent.mkdir(parents=True, exist_ok=True)
    if not csv_path.exists():
        with open(csv_path, "w", newline="", encoding="utf-8") as f:
            csv.writer(f).writerow(header)

def sha256sum(p: Path) -> str:
    h = hashlib.sha256()
    with open(p, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            h.update(chunk)
    return h.hexdigest()

def artifact_type_from_path(p: Path) -> str:
    ext = p.suffix.lower().lstrip(".")
    if ext == "":
        return "file"
    return ext

def to_path(s: str) -> Path:
    p = Path(s)
    
    if not p.is_absolute():
       
        p = (ROOT.parent / p).resolve()
    return p


HEADER = ["artifact_path","artifact_type","control_id","run_id","timestamp_utc",
          "sha256","signer","environment","pii_flag","notes"]
ensure_header(LEDGER, HEADER)


ARTIFACTS = [
    '01_Case/A_Payment_Dataflow/outputs/tokenized_payments.csv',
    '01_Case/A_Payment_Dataflow/outputs/validation_report.csv',
    '01_Case/B_PCI_Controls_and_Audit/outputs/pci_checks_detail.json',
    '01_Case/B_PCI_Controls_and_Audit/outputs/pci_evidence.csv',
    '01_Case/C_SQL_Audit_and_Analytics/outputs/DSAR_customer_250_information.csv',
    '01_Case/C_SQL_Audit_and_Analytics/outputs/retention_candidates.csv',
]


files = [to_path(s) for s in ARTIFACTS]


existing = set()
if LEDGER.exists():
    with open(LEDGER, "r", encoding="utf-8") as f:
        for row in csv.DictReader(f):
            existing.add((row.get("artifact_path",""), row.get("sha256","")))


rows_to_append = []
for p in files:
    if not p.exists():
        print(f"[WARN] Not found: {p}")
        continue
    rel = str(p.relative_to(ROOT.parent))  
    digest = sha256sum(p)
    if (rel, digest) in existing:
        print(f"[SKIP] Already recorded: {rel}  SHA256={digest[:8]}...")
        continue
  
    artifact_type = artifact_type_from_path(p)
    control_id = ""   
    run_id = datetime.now().strftime("%Y%m%dT%H%M%SZ")
    timestamp_utc = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
    signer = SIGNER
    environment = ENVIRONMENT
    pii_flag = PII_FLAG_DEFAULT
    notes = ""
    row = {
        "artifact_path": rel,
        "artifact_type": artifact_type,
        "control_id": control_id,
        "run_id": run_id,
        "timestamp_utc": timestamp_utc,
        "sha256": digest,
        "signer": signer,
        "environment": environment,
        "pii_flag": pii_flag,
        "notes": notes,
    }
    rows_to_append.append(row)
    print(f"[ADD] {rel}  {artifact_type}  SHA256={digest[:8]}...")


if rows_to_append:
    with open(LEDGER, "a", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=HEADER)
        for r in rows_to_append:
            w.writerow(r)
    print(f"\n{len(rows_to_append)} entries appended to ledger: {LEDGER}")
else:
    print("\nNo new artifacts to append.")


[ADD] 01_Case\A_Payment_Dataflow\outputs\tokenized_payments.csv  csv  SHA256=b898f638...
[ADD] 01_Case\A_Payment_Dataflow\outputs\validation_report.csv  csv  SHA256=ccd27d0d...
[ADD] 01_Case\B_PCI_Controls_and_Audit\outputs\pci_checks_detail.json  json  SHA256=4123156f...
[ADD] 01_Case\B_PCI_Controls_and_Audit\outputs\pci_evidence.csv  csv  SHA256=dcf758b3...
[ADD] 01_Case\C_SQL_Audit_and_Analytics\outputs\DSAR_customer_250_information.csv  csv  SHA256=9f041b7f...
[ADD] 01_Case\C_SQL_Audit_and_Analytics\outputs\retention_candidates.csv  csv  SHA256=1697449f...

6 entries appended to ledger: C:\Project\SafePay\03_evidence\ledger.csv


### Security note (public portfolio)
- Publish hashes only for **synthetic/non-sensitive** artifacts in public repos.
- Never publish secrets/PII.
