# Evidence Ledger Builder

**Goal:** Build a signed ledger (`03_Evidence/hashes/ledger.csv`) for audit evidence without moving files.

In [8]:
from pathlib import Path
from datetime import datetime
import csv, hashlib, pandas as pd


CWD    = Path.cwd()                  
ROOT   = CWD.parents[1]              
LEDGER = CWD / "ledger.csv"          


def sha256sum(p: Path) -> str:
    h = hashlib.sha256()
    with open(p, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""): h.update(chunk)
    return h.hexdigest()

def ensure_header(csv_path: Path, header):
    csv_path.parent.mkdir(parents=True, exist_ok=True)
    if not csv_path.exists():
        with open(csv_path, "w", newline="", encoding="utf-8") as f:
            csv.writer(f).writerow(header)

HEADER = ["artifact_path","artifact_type","control_id","run_id","timestamp_utc",
          "sha256","signer","environment","pii_flag","notes"]
ensure_header(LEDGER, HEADER)

In [5]:
ARTIFACTS = [
    '01_Case/A_Access_Review/outputs/findings.csv',
    '01_Case/B_Backup_Restore_test/data/restored_orders.csv',
    '01_Case/B_Backup_Restore_test/logs/backup_log.txt',
    '01_Case/B_Backup_Restore_test/logs/restore_log.txt',
    '01_Case/C_Data_Retention/data/customers_labels.csv',
    '01_Case/C_Data_Retention/data/customers_after_purge.csv',
    '01_Case/C_Data_Retention/logs/purge_log.txt'
]


def to_path(s: str) -> Path:
    p = Path(s)
    if not p.is_absolute():
        p = ROOT / p
    return p.resolve()

files = [to_path(s) for s in ARTIFACTS]

existing = set()
with open(LEDGER, "r", encoding="utf-8") as f:
    for row in csv.DictReader(f):
        existing.add((row["artifact_path"], row["sha256"]))
        

In [9]:


def meta_for(path: Path):
    low = str(path).lower()
    if "access" in low:
        return ("C-AC-REV", "AR", "GRC Lead", "Access orphan & admin review")
    if "backup" in low or "restore" in low:
        return ("C-BR", "BR", "ISMS", "Backup/Restore evidence")
    if "retention" in low or "purge" in low or "labels" in low:
        return ("C-RET", "RET", "Data Owner", "Retention labels/purge evidence")
    # default genérico
    return ("C-UNK", "RUN", "Owner", "Evidence")

now_utc = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
date_tag = datetime.now().strftime("%Y-%m-%d")
rows_to_write = []

for p in files:
    if not p.exists():
        print("Not found:", p)
        continue
    rel = p.relative_to(ROOT).as_posix()
    art_type = "csv" if p.suffix.lower()==".csv" else ("log" if p.suffix.lower()==".txt" else p.suffix.lower().lstrip("."))
    ctrl, run_prefix, signer, note = meta_for(p)
    run_id = f"{run_prefix}-{date_tag}"
    digest = sha256sum(p)

    if (rel, digest) in existing:
        print("Already recorded:", rel)
        continue

    rows_to_write.append([rel, art_type, ctrl, run_id, now_utc, digest,
                          signer, "non-prod", "synthetic", note])

if rows_to_write:
    with open(LEDGER, "a", newline="", encoding="utf-8") as f:
        csv.writer(f).writerows(rows_to_write)
    print(f"Appended {len(rows_to_write)} row(s) to {LEDGER}")
else:
    print("Nothing to append. Ledger is up to date.")

ledger = pd.read_csv('ledger.csv')
ledger.head()

Appended 7 row(s) to c:\Project\GRC 1\03_Evidence\hashes\ledger.csv


Unnamed: 0,artifact_path,artifact_type,control_id,run_id,timestamp_utc,sha256,signer,environment,pii_flag,notes
0,01_Case/A_Access_Review/outputs/findings.csv,csv,C-AC-REV,AR-2025-09-12,2025-09-12T22:42:43Z,426790cb24b149c700ff591363a73f9bcaad7272c1c9d2...,GRC Lead,non-prod,synthetic,Access orphan & admin review
1,01_Case/B_Backup_Restore_test/data/restored_or...,csv,C-BR,BR-2025-09-12,2025-09-12T22:42:43Z,6c1ead514c83d437153b1a52fe9799f807f55daeab1add...,ISMS,non-prod,synthetic,Backup/Restore evidence
2,01_Case/B_Backup_Restore_test/logs/backup_log.txt,log,C-BR,BR-2025-09-12,2025-09-12T22:42:43Z,ab171b9b035de1f0b895b1a923739030fdeb4bb4e1d18a...,ISMS,non-prod,synthetic,Backup/Restore evidence
3,01_Case/B_Backup_Restore_test/logs/restore_log...,log,C-BR,BR-2025-09-12,2025-09-12T22:42:43Z,21a37d034ad2334e4d8490fd58dcb8e5d1cda4e8f0ea98...,ISMS,non-prod,synthetic,Backup/Restore evidence
4,01_Case/C_Data_Retention/data/customers_labels...,csv,C-RET,RET-2025-09-12,2025-09-12T22:42:43Z,b93700977bb9facc63f11e03b1012f3d19fa7c710fe42b...,Data Owner,non-prod,synthetic,Retention labels/purge evidence


### Security note (public portfolio)
- Publish hashes only for **synthetic/non-sensitive** artifacts in public repos.
- Never publish secrets/PII.
