In [23]:
from pathlib import Path
import pandas as pd
import numpy as np
import hashlib
import json
import time
import random

PROJECT_ROOT = Path("..").resolve()
DATA_ROOT = PROJECT_ROOT / "data_csv"
CORE_ROOT = PROJECT_ROOT / "artifact" / "core7"
CURIEUS_ROOT = DATA_ROOT / "curieus"

OUT_EVENT_LOG = CORE_ROOT / "core7_governance_event_log.csv"
OUT_DIAG = CORE_ROOT / "core7_run_diagnostics.csv"

CORE_ROOT.mkdir(parents=True, exist_ok=True)

ANTIBODIES = ["antibody_A", "antibody_B", "antibody_C"]
CASES = {
    "A_ALWAYS_ALLOW": {"governed": False},
    "B_GOVERNED": {"governed": True},
}

GLOBAL_SEED = 42
random.seed(GLOBAL_SEED)
np.random.seed(GLOBAL_SEED)

In [24]:
def load_baseline_risk(ab_key: str):
    base = CURIEUS_ROOT / ab_key

    risk = {
        "IMMUNO": "MEDIUM",
        "VISCOSITY": "MEDIUM",
        "CHARGE": "MEDIUM",
        "PROCESS": "MEDIUM",
    }

    visco_files = list(base.glob("*DeepViscosity*.csv"))
    if visco_files:
        df = pd.read_csv(visco_files[0])
        if not df.empty:
            risk["VISCOSITY"] = "HIGH"

    return risk

BASELINE_RISK = {ab: load_baseline_risk(ab) for ab in ANTIBODIES}
BASELINE_RISK

{'antibody_A': {'IMMUNO': 'MEDIUM',
  'VISCOSITY': 'MEDIUM',
  'CHARGE': 'MEDIUM',
  'PROCESS': 'MEDIUM'},
 'antibody_B': {'IMMUNO': 'MEDIUM',
  'VISCOSITY': 'MEDIUM',
  'CHARGE': 'MEDIUM',
  'PROCESS': 'MEDIUM'},
 'antibody_C': {'IMMUNO': 'MEDIUM',
  'VISCOSITY': 'MEDIUM',
  'CHARGE': 'MEDIUM',
  'PROCESS': 'MEDIUM'}}

In [25]:
INTENDED_AXES = ["IMMUNO", "VISCOSITY", "CHARGE", "PROCESS"]

def build_mutation_pool(n=20):
    return [
        {
            "candidate_id": f"M{i:03d}",
            "mutation_desc": f"CDR_mut_{i}",
            "intended_axis": random.choice(INTENDED_AXES),
        }
        for i in range(n)
    ]

MUTATION_POOL = build_mutation_pool(20)


In [26]:
def generate_event(baseline_risk, intended_axis):
    risk_level = baseline_risk[intended_axis]

    if risk_level == "HIGH":
        conflict_prob = 0.6
    elif risk_level == "MEDIUM":
        conflict_prob = 0.3
    else:
        conflict_prob = 0.1

    conflict = np.random.rand() < conflict_prob

    severity = (
        "HIGH" if conflict and np.random.rand() < 0.5
        else "MEDIUM" if conflict
        else "LOW"
    )

    return {
        "event_type": intended_axis,
        "severity": severity,
        "conflict_flag": conflict,
    }

def update_state(prev_state, event, mutation_count, governed):
    sod = prev_state["SoD"]
    soms = prev_state["SoMS"]

    sev_weight = {"LOW": 0.3, "MEDIUM": 0.7, "HIGH": 1.2}
    sod += sev_weight[event["severity"]] + 0.2 * mutation_count

    if governed:
        if event["conflict_flag"]:
            soms += 0.6
        else:
            soms = max(0, soms - 0.3)

    return {"SoD": sod, "SoMS": soms}

In [27]:
POLICY = {
    "VETO_SOMS": 4.0,
    "FREEZE_SOMS": 7.0,
    "VETO_CONFLICT_STREAK": 3,
    "FREEZE_VETO_STREAK": 3,
    "FORCED_VETO_STEP": 10,
}

def governance_decision(state, conflict_streak, veto_streak, step, case_id):
    if case_id == "B_GOVERNED" and step == POLICY["FORCED_VETO_STEP"]:
        return "VETO", "REASON_FORCED_GOVERNANCE_TEST"

    if veto_streak >= POLICY["FREEZE_VETO_STREAK"]:
        return "FREEZE", "REASON_VETO_STREAK_TO_FREEZE"

    if state["SoMS"] >= POLICY["FREEZE_SOMS"]:
        return "FREEZE", "REASON_SOMS_CUMSUM"

    if state["SoMS"] >= POLICY["VETO_SOMS"]:
        return "VETO", "REASON_SOMS_ACCUMULATION"

    if conflict_streak >= POLICY["VETO_CONFLICT_STREAK"]:
        return "VETO", "REASON_CONFLICT_ESCALATION"

    return "ALLOW", ""

In [28]:
RUN_ID = f"core7_{int(time.time())}"
event_logs = []
diag_logs = []

N_STEPS = 30

for case_id, case_cfg in CASES.items():
    for ab in ANTIBODIES:
        baseline = BASELINE_RISK[ab]
        state = {"SoD": 0.0, "SoMS": 0.0}
        mutation_count = 0
        conflict_streak = 0
        veto_streak = 0

        for step in range(N_STEPS):
            mutation = random.choice(MUTATION_POOL)
            event = generate_event(baseline, mutation["intended_axis"])

            state = update_state(
                state,
                event,
                mutation_count,
                governed=case_cfg["governed"],
            )

            conflict_streak = conflict_streak + 1 if event["conflict_flag"] else 0

            if case_cfg["governed"]:
                action, reason = governance_decision(
                    state, conflict_streak, veto_streak, step, case_id
                )
            else:
                action, reason = "ALLOW", ""

            veto_streak = veto_streak + 1 if action == "VETO" else 0

            blocked = action != "ALLOW"
            if action == "ALLOW":
                mutation_count += 1

            event_logs.append({
                "run_id": RUN_ID,
                "case_id": case_id,
                "antibody_id": ab,
                "step": step,
                "attempted": True,
                "action": action,
                "reason_code": reason,
                "SoD": round(state["SoD"], 3),
                "SoMS": round(state["SoMS"], 3),
                "conflict_flag": event["conflict_flag"],
                "mutation_desc": mutation["mutation_desc"],
                "intended_axis": mutation["intended_axis"],
                "blocked": blocked,
            })

            if action == "FREEZE":
                break

        diag_logs.append({
            "run_id": RUN_ID,
            "case_id": case_id,
            "antibody_id": ab,
            "n_steps": step + 1,
            "seed": GLOBAL_SEED,
            "policy": json.dumps(POLICY),
            "mutation_pool_size": len(MUTATION_POOL),
        })

In [29]:
event_df = pd.DataFrame(event_logs)
diag_df = pd.DataFrame(diag_logs)

event_df.to_csv(OUT_EVENT_LOG, index=False)
diag_df.to_csv(OUT_DIAG, index=False)

OUT_EVENT_LOG, OUT_DIAG

(PosixPath('/Users/mac/Desktop/De/Developability_Data/core/artifact/core7/core7_governance_event_log.csv'),
 PosixPath('/Users/mac/Desktop/De/Developability_Data/core/artifact/core7/core7_run_diagnostics.csv'))