<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/secure_experiment_logger_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# secure_experiment_logger.py
# pip install cryptography torch tensorboard pandas

import os
import json
import base64
import secrets
import hashlib
import datetime
import pathlib
import tarfile
import shutil
from typing import Optional, Iterable, Dict, Any, List

import pandas as pd
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from torch.utils.tensorboard import SummaryWriter


# ----------------------------
# Configuration and constants
# ----------------------------
ALG = "AES-256-GCM"
ENV_KEY = "EXPLOG_KEY_B64"        # Base64-encoded 32-byte key
KEYFILE_NAME = "key.b64"          # Stored inside each run directory
ENC_EPISODES = "episodes.jsonl.enc"
ENC_EVENTS = "events.tgz.enc"
MANIFEST = "manifest.json"
TB_PLAIN_DIR = "tb_events_plain"  # Temporary plaintext TB events before encryption


# ----------------------------
# Key management
# ----------------------------
def _decode_key(b64key: str) -> bytes:
    raw = base64.b64decode(b64key)
    if len(raw) != 32:
        raise ValueError("Key must be Base64(32 bytes) for AES-256.")
    return raw

def _encode_key(raw: bytes) -> str:
    return base64.b64encode(raw).decode("ascii")

def generate_key() -> str:
    """Generate a new 32-byte key and return Base64 string."""
    return _encode_key(secrets.token_bytes(32))

def load_or_create_key(logdir: pathlib.Path) -> bytes:
    """
    Precedence:
      1) ENV VAR EXPLOG_KEY_B64
      2) logdir/key.b64
      3) generate, save to logdir/key.b64
    """
    b64 = os.getenv(ENV_KEY)
    if b64:
        return _decode_key(b64)

    keyfile = logdir / KEYFILE_NAME
    if keyfile.exists():
        return _decode_key(keyfile.read_text().strip())

    b64 = generate_key()
    keyfile.write_text(b64)
    print(f"[secure-logger] Generated new key at: {keyfile}")
    print(f"[secure-logger] Reuse via: {ENV_KEY}={b64}")
    return _decode_key(b64)

def key_fingerprint(raw_key: bytes) -> str:
    return hashlib.sha256(raw_key).hexdigest()[:16]


# ----------------------------
# Encryption helpers
# ----------------------------
def encrypt_bytes(raw_key: bytes, data: bytes) -> Dict[str, Any]:
    """Encrypt arbitrary bytes; returns a JSON-serializable record."""
    aes = AESGCM(raw_key)
    nonce = secrets.token_bytes(12)  # 96-bit nonce
    cipher = aes.encrypt(nonce, data, None)
    return {
        "v": 1,
        "n": base64.b64encode(nonce).decode("ascii"),
        "c": base64.b64encode(cipher).decode("ascii"),
    }

def decrypt_bytes(raw_key: bytes, rec: Dict[str, Any]) -> bytes:
    aes = AESGCM(raw_key)
    nonce = base64.b64decode(rec["n"])
    cipher = base64.b64decode(rec["c"])
    return aes.decrypt(nonce, cipher, None)


# ----------------------------
# Utilities
# ----------------------------
def create_run_dir(root: pathlib.Path = pathlib.Path("runs"),
                   name: Optional[str] = None) -> pathlib.Path:
    ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = root / (name or ts)
    run_dir.mkdir(parents=True, exist_ok=True)
    return run_dir

def _tar_gz_directory(src_dir: pathlib.Path, out_tgz: pathlib.Path) -> None:
    with tarfile.open(out_tgz, "w:gz") as tar:
        tar.add(src_dir, arcname=src_dir.name)


# ----------------------------
# Secure logger
# ----------------------------
class SecureRun:
    """
    End-to-end encrypted experiment run.
    - Episode logs: appended as encrypted JSONL records
    - TensorBoard: written to a plaintext temp dir, then encrypted+removed on finalize
    """
    def __init__(
        self,
        run_dir: Optional[pathlib.Path] = None,
        root: pathlib.Path = pathlib.Path("runs"),
        run_name: Optional[str] = None,
        enable_tensorboard: bool = True,
    ):
        self.logdir = run_dir or create_run_dir(root, run_name)
        self.key = load_or_create_key(self.logdir)
        self.key_fp = key_fingerprint(self.key)

        # Manifest
        manifest = {
            "algorithm": ALG,
            "created": datetime.datetime.now().isoformat(timespec="seconds"),
            "key_fingerprint": self.key_fp,
            "enc_files": [ENC_EPISODES, ENC_EVENTS],
            "tb_plain_dir": TB_PLAIN_DIR if enable_tensorboard else None,
        }
        (self.logdir / MANIFEST).write_text(json.dumps(manifest, indent=2))

        # Episode encrypted file path
        self.enc_episodes_path = self.logdir / ENC_EPISODES

        # In-memory cache for quick analysis during the run
        self._episodes: List[Dict[str, Any]] = []

        # TensorBoard setup (plaintext temp dir, purged on finalize)
        self.enable_tb = enable_tensorboard
        self.tb_writer: Optional[SummaryWriter] = None
        self.tb_plain_dir = self.logdir / TB_PLAIN_DIR
        if self.enable_tb:
            self.tb_plain_dir.mkdir(parents=True, exist_ok=True)
            self.tb_writer = SummaryWriter(log_dir=self.tb_plain_dir.as_posix())

    # --- context manager sugar ---
    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc, tb):
        self.finalize()
        # Do not suppress exceptions
        return False

    # --- logging API ---
    def log_episode(self, ep_idx: int, reward: float, length: int, safety_cost: float):
        row = {
            "episode": ep_idx,
            "reward": reward,
            "length": length,
            "safety": safety_cost,
            "ts": datetime.datetime.now().isoformat(timespec="seconds"),
        }

        # TensorBoard (temp plaintext)
        if self.tb_writer is not None:
            self.tb_writer.add_scalar("reward/episode", reward, ep_idx)
            self.tb_writer.add_scalar("safety/episode", safety_cost, ep_idx)
            self.tb_writer.flush()

        # Encrypted append to JSONL.enc
        rec = encrypt_bytes(self.key, json.dumps(row, separators=(",", ":")).encode("utf-8"))
        with open(self.enc_episodes_path, "a", encoding="utf-8") as f:
            f.write(json.dumps(rec) + "\n")

        # In-memory
        self._episodes.append(row)

    def get_episode_df(self) -> pd.DataFrame:
        return pd.DataFrame(self._episodes)

    def save_episode_csv_encrypted(self, filename: str = "episodes.csv.enc"):
        df = self.get_episode_df()
        csv_bytes = df.to_csv(index=False).encode("utf-8")
        rec = encrypt_bytes(self.key, csv_bytes)
        (self.logdir / filename).write_text(json.dumps(rec))
        print(f"[secure-logger] Encrypted CSV saved: {self.logdir / filename}")

    def finalize(self):
        """
        Close TB, encrypt TB event files to events.tgz.enc, remove plaintext.
        Safe to call multiple times.
        """
        # Close writer
        if self.tb_writer is not None:
            self.tb_writer.flush()
            self.tb_writer.close()

        # Encrypt TB directory if present and non-empty
        if self.enable_tb and self.tb_plain_dir.exists():
            has_files = any(self.tb_plain_dir.iterdir())
            if has_files:
                tmp_tgz = self.logdir / "events.tgz"
                _tar_gz_directory(self.tb_plain_dir, tmp_tgz)
                # Encrypt tarball, then remove plaintext
                enc = encrypt_bytes(self.key, tmp_tgz.read_bytes())
                (self.logdir / ENC_EVENTS).write_text(json.dumps(enc))
                try:
                    tmp_tgz.unlink(missing_ok=True)
                except TypeError:
                    # Python <3.8 compatibility
                    if tmp_tgz.exists():
                        tmp_tgz.unlink()
            # Remove plaintext TB directory
            shutil.rmtree(self.tb_plain_dir, ignore_errors=True)
        print(f"[secure-logger] Finalized run at {self.logdir} (key fp: {self.key_fp})")


# ----------------------------
# Decryption utilities
# ----------------------------
def iter_decrypted_jsonl(logdir: pathlib.Path, key: Optional[bytes] = None) -> Iterable[Dict[str, Any]]:
    """
    Yield decrypted JSON dicts from episodes.jsonl.enc in the given run dir.
    """
    if key is None:
        key = load_or_create_key(logdir)
    enc_path = logdir / ENC_EPISODES
    with open(enc_path, "r", encoding="utf-8") as f:
        for line in f:
            s = line.strip()
            if not s:
                continue
            rec = json.loads(s)
            plain = decrypt_bytes(key, rec)
            yield json.loads(plain.decode("utf-8"))

def load_decrypted_df(logdir: pathlib.Path, key: Optional[bytes] = None) -> pd.DataFrame:
    return pd.DataFrame(list(iter_decrypted_jsonl(logdir, key)))

def decrypt_csv_file(enc_csv_path: pathlib.Path, key: Optional[bytes] = None) -> pd.DataFrame:
    """
    Decrypt a CSV encrypted via save_episode_csv_encrypted and return a DataFrame.
    """
    logdir = enc_csv_path.parent
    if key is None:
        key = load_or_create_key(logdir)
    rec = json.loads(enc_csv_path.read_text())
    csv_bytes = decrypt_bytes(key, rec)
    from io import StringIO
    return pd.read_csv(StringIO(csv_bytes.decode("utf-8")))

def decrypt_events_to_dir(logdir: pathlib.Path, out_dir: pathlib.Path, key: Optional[bytes] = None):
    """
    Decrypt events.tgz.enc and extract into out_dir for local TensorBoard viewing.
    """
    if key is None:
        key = load_or_create_key(logdir)
    enc_path = logdir / ENC_EVENTS
    rec = json.loads(enc_path.read_text())
    tgz_bytes = decrypt_bytes(key, rec)
    tmp_tgz = logdir / "events_decrypted.tgz"
    tmp_tgz.write_bytes(tgz_bytes)
    out_dir.mkdir(parents=True, exist_ok=True)
    with tarfile.open(tmp_tgz, "r:gz") as tar:
        tar.extractall(path=out_dir)
    tmp_tgz.unlink()
    print(f"[secure-logger] Decrypted TensorBoard events extracted to: {out_dir}")


# ----------------------------
# Example direct usage
# ----------------------------
if __name__ == "__main__":
    # Optionally set a fixed key in the environment before running:
    # os.environ["EXPLOG_KEY_B64"] = "<Base64 32-byte key>"

    with SecureRun(enable_tensorboard=True) as run:
        for ep in range(5):
            run.log_episode(ep_idx=ep, reward=ep * 1.2, length=100 + ep, safety_cost=ep * 0.05)
        run.save_episode_csv_encrypted()

    # Later (or on a different machine with the same key):
    df = load_decrypted_df(run.logdir)
    print(df.tail())

    # To view TensorBoard, decrypt events to a temporary folder:
    tb_out = run.logdir / "tb_events_view"
    decrypt_events_to_dir(run.logdir, tb_out)
    print("Point TensorBoard at:", tb_out)