<a href="https://colab.research.google.com/github/MatiasNazareth1993-coder/Virtual-cell/blob/main/Virtual_cell.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
import os
from datetime import datetime

np.random.seed(42)

# Simulation parameters
N_cells = 50
T = 100  # timesteps
alpha = 0.8   # telomerase efficacy coefficient
delta_rep_mean = 30.0  # mean loss per division (base pair units)
delta_rep_sd = 5.0
sigma_noise = 10.0  # measurement noise
L_sen = 3000.0  # senescence threshold (bp)
L_crit = 1000.0
initial_L_mean = 8000.0
initial_L_sd = 200.0

# Controller thresholds
monitor_low = 4000.0
monitor_high = 7000.0
max_action = 1.0
min_action = 0.0

# Risk model params (simple hazard)
beta_action = 0.5
beta_mut = 1.2

# Initialize state arrays
L = np.random.normal(initial_L_mean, initial_L_sd, size=N_cells)  # telomere lengths
mut_count = np.zeros(N_cells)  # accumulated mutations proxy
action_history = np.zeros((T, N_cells))
observations = np.zeros((T, N_cells))
risk_scores = np.zeros((T, N_cells))
state_labels = np.zeros((T, N_cells), dtype=int)  # 0=healthy,1=senescent,2=critical

# Simple rule-based controller function
def controller_rule(obs_L, history_actions, mut, t):
    """
    Rule:
    - If observed L < monitor_low and mut small -> activate telomerase (action proportional)
    - If observed L between monitor_low and monitor_high -> keep low action
    - If observed L > monitor_high -> action=0 (avoid unnecessary activation)
    - If mut_count high or risk predicted high -> block activation (action=0)
    """
    # Estimate simple mutational risk threshold
    mut_threshold = 5
    if mut > mut_threshold:
        return 0.0
    if obs_L < monitor_low:
        # proportional control to how far below threshold
        return float(min(max_action, (monitor_low - obs_L) / monitor_low))
    elif obs_L < monitor_high:
        return 0.2 * (1 - (obs_L - monitor_low) / (monitor_high - monitor_low))
    else:
        return 0.0

# Run simulation
records = []
for t in range(T):
    # Simulate measurement noise and sensor sampling
    measured_L = L + np.random.normal(0, sigma_noise, size=N_cells)
    observations[t] = measured_L
    for i in range(N_cells):
        # Decide action based on controller rule and history
        hist = action_history[max(0, t-10):t, i] if t>0 else np.array([])
        act = controller_rule(measured_L[i], hist, mut_count[i], t)
        action_history[t, i] = act

        # Update telomere based on mechanistic eq: L_{t+1} = L_t - delta_rep + alpha*A + noise
        delta_rep = max(0.0, np.random.normal(delta_rep_mean, delta_rep_sd))
        L[i] = L[i] - delta_rep + alpha * act * 100.0 + np.random.normal(0, 2.0)  # action scaled
        # Ensure lengths non-negative
        L[i] = max(0.0, L[i])

        # Update mutation count proxy: increases with action and low telomere length
        mut_increment = np.random.poisson(0.02 + 0.0001 * max(0, (monitor_low - L[i])))
        # action increases risk slightly
        mut_increment += int(np.random.poisson(0.01 * act))
        mut_count[i] += mut_increment

        # Risk score (simple)
        risk = np.exp(beta_action * np.sum(action_history[max(0,t-20):t+1, i]) / (t+1 + 1e-6) + beta_mut * mut_count[i])
        risk_scores[t, i] = risk

        # State labeling
        if L[i] < L_crit:
            state = 2  # critical
        elif L[i] < L_sen:
            state = 1  # senescent-prone
        else:
            state = 0  # healthy
        state_labels[t, i] = state

        # Record
        records.append({
            "t": t,
            "cell_id": i,
            "measured_L": measured_L[i],
            "true_L": L[i],
            "action": act,
            "mut_count": mut_count[i],
            "risk": risk,
            "state": state
        })

# Save to CSV
df = pd.DataFrame.from_records(records)
out_path = "/mnt/data/ooc_telomere_simulation.csv"
# Create the directory if it does not exist
os.makedirs(os.path.dirname(out_path), exist_ok=True)
df.to_csv(out_path, index=False)

# Display summary dataframe (last timestep snapshot)
snapshot = df[df['t']==(T-1)].sort_values('cell_id').reset_index(drop=True)

# Replaced caas_jupyter_tools display with a direct print of the dataframe head
print("Organ-on-Chip Telomere Simulation Snapshot (t={})".format(T-1))
print(snapshot.head())

# Provide info about saved file
out_path, df.shape

Organ-on-Chip Telomere Simulation Snapshot (t=99)
    t  cell_id   measured_L       true_L    action  mut_count       risk  \
0  99        0  5382.674459  5357.254875  0.107822        3.0  36.961110   
1  99        1  5333.592560  5321.943773  0.111094        3.0  36.964120   
2  99        2  5417.334106  5402.686217  0.105511        3.0  36.947836   
3  99        3  5535.250110  5519.640509  0.097650        2.0  11.116666   
4  99        4  5375.144311  5353.507147  0.108324        2.0  11.134162   

   state  
0      0  
1      0  
2      0  
3      0  
4      0  


('/mnt/data/ooc_telomere_simulation.csv', (5000, 8))

In [4]:
{
  "timestamp": "2025-11-18T16:00:00Z",
  "chip_id": "OOC-01",
  "cells": [
    {"cell_id": 0, "measured_L": 5120.4, "biomarkers": {"p53": 0.1, "γH2AX": 0.02}},
    ...
  ]
}


{'timestamp': '2025-11-18T16:00:00Z',
 'chip_id': 'OOC-01',
 'cells': [{'cell_id': 0,
   'measured_L': 5120.4,
   'biomarkers': {'p53': 0.1, 'γH2AX': 0.02}},
  Ellipsis]}

In [6]:
import json

json_string = '''
{
  "actions": [{"cell_id":0,"action":0.2,"explain":"L below target by 0.14x","blocked":false}],
  "global_alert": false
}
'''

# Parse the JSON string into a Python object
data = json.loads(json_string)

# You can now work with the 'data' dictionary
# For example, print it to see the structure
print(data)

{'actions': [{'cell_id': 0, 'action': 0.2, 'explain': 'L below target by 0.14x', 'blocked': False}], 'global_alert': False}


In [8]:
import json

json_string = '''
{
  "actions": [{"cell_id":0,"action":0.2,"explain":"L below target by 0.14x","blocked":false}],
  "global_alert": false
}
'''

# Parse the JSON string into a Python object
data = json.loads(json_string)

# You can now work with the 'data' dictionary
# For example, print it to see the structure
print(data)

{'actions': [{'cell_id': 0, 'action': 0.2, 'explain': 'L below target by 0.14x', 'blocked': False}], 'global_alert': False}


In [9]:
# Writing an RL-ready training script (PPO) for the Organ-on-Chip telomere environment.
# The script will be saved to /mnt/data/ppo_ooc.py. It defines a Gym-like env and a training harness.
# The harness will try to use stable-baselines3 (recommended). If not installed, it will fallback to
# a minimal random-rollout tester and print instructions for installing stable-baselines3 + PyTorch.
#
# This cell only writes the script file; it does not perform heavy training here.

script = r'''
"""
ppo_ooc.py

PPO training harness for the Organ-on-Chip telomere environment.

Usage:
    - Recommended: install PyTorch and stable-baselines3
        pip install torch stable-baselines3[extra] gymnasium

    - Then run:
        python ppo_ooc.py --train --timesteps 200000 --model_path ./ppo_ooc_model.zip

This script:
 - Defines a Gym-compatible environment `OOCTelomereEnv`
 - Provides a training harness using stable-baselines3.PPO if available
 - Implements evaluation and model saving
 - Includes configurable parameters for the mechanistic model

Notes:
 - The environment exposes continuous action in [0,1] per cell, but for simplicity the action returned
   by the agent is a single scalar applied to all cells (this can be extended to per-cell actions).
 - The script is intentionally self-contained and suitable for running on a workstation.
 - No wet-lab instructions or DNA sequences are included.
"""

import argparse
import os
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import json
import random

# ------------------------
# Environment definition
# ------------------------
class OOCTelomereEnv(gym.Env):
    """
    A Gym-like environment simulating multiple virtual cells in an organ-on-chip.
    Observation: vector containing summary statistics and a sample of cell telomere lengths.
    Action: continuous scalar in [0,1] representing global telomerase activation level.
    Reward: composite reward encouraging maintenance of telomere lengths without increasing risk.
    """

    metadata = {"render_modes": ["human"], "render_fps": 10}

    def __init__(self, n_cells=32, seed=None):
        super().__init__()
        self.n_cells = n_cells
        self.rng = np.random.RandomState(seed)
        # Mechanistic params (can be tuned)
        self.alpha = 0.8
        self.delta_rep_mean = 30.0
        self.delta_rep_sd = 5.0
        self.sigma_noise = 10.0
        self.L_sen = 3000.0
        self.L_crit = 1000.0
        self.monitor_low = 4000.0
        self.monitor_high = 7000.0

        # State
        self.L = None
        self.mut_count = None
        self.t = 0
        self.max_steps = 200

        # Observation space: we'll provide a vector of length 4 + n_sample
        # [mean_L, median_L, min_L, max_L, sample_telomeres...]
        self.n_sample = min(8, self.n_cells)
        obs_dim = 4 + self.n_sample
        self.observation_space = spaces.Box(low=0.0, high=1e5, shape=(obs_dim,), dtype=np.float32)

        # Action space: single scalar in [0,1]
        self.action_space = spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32)

    def reset(self, seed=None, options=None):
        if seed is not None:
            self.rng.seed(seed)
        # Initialize telomere lengths and mutation counts
        initial_L_mean = 8000.0
        initial_L_sd = 200.0
        self.L = self.rng.normal(initial_L_mean, initial_L_sd, size=self.n_cells)
        self.L = np.clip(self.L, 100.0, None)
        self.mut_count = np.zeros(self.n_cells, dtype=np.float32)
        self.t = 0
        return self._get_obs(), {}

    def _get_obs(self):
        # compute summary stats + sample telomeres
        mean_L = float(np.mean(self.L))
        median_L = float(np.median(self.L))
        min_L = float(np.min(self.L))
        max_L = float(np.max(self.L))
        # sample first n_sample telomeres (or random sample for variety)
        sample = self.L[:self.n_sample]
        obs = np.concatenate([[mean_L, median_L, min_L, max_L], sample])
        return obs.astype(np.float32)

    def step(self, action):
        # action is a scalar in [0,1]; apply to all cells (global control)
        a = float(np.clip(np.asarray(action).squeeze(), 0.0, 1.0))
        # mechanistic update
        for i in range(self.n_cells):
            delta_rep = max(0.0, self.rng.normal(self.delta_rep_mean, self.delta_rep_sd))
            self.L[i] = self.L[i] - delta_rep + self.alpha * a * 100.0 + self.rng.normal(0, 2.0)
            self.L[i] = max(0.0, self.L[i])
            # mutation increment proxy
            mut_inc = self.rng.poisson(0.02 + 0.0001 * max(0, (self.monitor_low - self.L[i])))
            mut_inc += int(self.rng.poisson(0.01 * a))
            self.mut_count[i] += mut_inc

        # compute risk proxy
        beta_action = 0.5
        beta_mut = 1.2
        risk = np.exp(beta_action * a + beta_mut * np.mean(self.mut_count))

        # reward design:
        # - penalize cells below senescence threshold
        # - penalize high risk
        # - reward keeping mean_L near target (e.g., 6500)
        mean_L = float(np.mean(self.L))
        target = 6500.0
        reward_homeostasis = -abs(mean_L - target) / target
        penalty_senescent = -np.sum(self.L < self.L_sen) / float(self.n_cells)
        penalty_risk = -0.1 * (risk / (1.0 + risk))
        reward = 1.0 * reward_homeostasis + 2.0 * penalty_senescent + 1.0 * penalty_risk

        self.t += 1
        done = bool(self.t >= self.max_steps)
        info = {"mean_L": mean_L, "risk": float(risk), "mut_mean": float(np.mean(self.mut_count))}
        return self._get_obs(), float(reward), done, False, info

    def render(self, mode="human"):
        print(f"t={self.t} mean_L={np.mean(self.L):.1f} mut_mean={np.mean(self.mut_count):.2f}")

# ------------------------
# Training harness
# ------------------------
def train_with_sb3(env_id=None, timesteps=100000, model_path="./ppo_ooc_model.zip"):
    try:
        # import stable-baselines3 and train PPO
        from stable_baselines3 import PPO
        from stable_baselines3.common.env_util import make_vec_env
        from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback

        # Create vectorized env
        def make_env():
            return OOCTelomereEnv(n_cells=32)
        venv = make_vec_env(lambda: OOCTelomereEnv(n_cells=32), n_envs=4)

        model = PPO('MlpPolicy', venv, verbose=1, batch_size=256)
        eval_env = OOCTelomereEnv(n_cells=32)
        eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/',
                                     log_path='./logs/', eval_freq=5000, n_eval_episodes=5)
        checkpoint_cb = CheckpointCallback(save_freq=5000, save_path='./logs/',
                                           name_prefix='ppo_ooc_ckpt')
        model.learn(total_timesteps=timesteps, callback=[eval_callback, checkpoint_cb])
        model.save(model_path)
        print("Training complete. Model saved to:", model_path)
    except Exception as e:
        print("stable-baselines3 training failed or not available:", str(e))
        print("Please install stable-baselines3 and a supported PyTorch version to run PPO training.")
        print("Fallback: you can run random rollouts or implement a simple policy gradient.")
        return

def evaluate_model(model_path=None, n_episodes=10):
    try:
        from stable_baselines3 import PPO
        model = PPO.load(model_path)
        env = OOCTelomereEnv(n_cells=32)
        for ep in range(n_episodes):
            obs, _ = env.reset()
            done = False
            total_reward = 0.0
            while not done:
                action, _ = model.predict(obs, deterministic=True)
                obs, reward, done, trunc, info = env.step(action)
                total_reward += reward
            print(f"Episode {ep} reward: {total_reward:.3f} info={info}")
    except Exception as e:
        print("Evaluation failed (stable-baselines3 may be missing):", str(e))

# ------------------------
# CLI
# ------------------------
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--train", action="store_true", help="Train PPO model (requires stable-baselines3)")
    parser.add_argument("--timesteps", type=int, default=200000, help="Total training timesteps")
    parser.add_argument("--model_path", type=str, default="./ppo_ooc_model.zip", help="Model save path")
    parser.add_argument("--eval", action="store_true", help="Run a short evaluation using saved model")
    args = parser.parse_args()

    if args.train:
        print("Starting PPO training (attempting to use stable-baselines3)...")
        train_with_sb3(timesteps=args.timesteps, model_path=args.model_path)
    if args.eval:
        print("Evaluating model:", args.model_path)
        evaluate_model(model_path=args.model_path)

if __name__ == "__main__":
    main()
'''

out_path = "/mnt/data/ppo_ooc.py"
with open(out_path, "w") as f:
    f.write(script)

out_path



'/mnt/data/ppo_ooc.py'

In [11]:
!pip install torch stable-baselines3[extra] gymnasium

!python /mnt/data/ppo_ooc.py --train --timesteps 200000 --model_path ./ppo_ooc_model.zip

!python /mnt/data/ppo_ooc.py --eval --model_path ./ppo_ooc_model.zip

Collecting stable-baselines3[extra]
  Downloading stable_baselines3-2.7.0-py3-none-any.whl.metadata (4.8 kB)
Downloading stable_baselines3-2.7.0-py3-none-any.whl (187 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m187.2/187.2 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: stable-baselines3
Successfully installed stable-baselines3-2.7.0
Starting PPO training (attempting to use stable-baselines3)...
2025-11-18 21:25:28.714480: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763501128.753091    1120 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763501128.765192    1120 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has alrea

In [12]:
# Write updated environment script with per-cell action space
script = r'''
"""
ooc_telomere_env_cell_actions.py

Organ-on-chip telomere RL environment with per-cell telomerase control.
"""

import numpy as np
import gymnasium as gym
from gymnasium import spaces

class OOCTelomereCellActionEnv(gym.Env):
    metadata = {"render_modes": ["human"], "render_fps": 10}

    def __init__(self, n_cells=32, seed=None):
        super().__init__()
        self.n_cells = n_cells
        self.rng = np.random.RandomState(seed)

        # parameters
        self.delta_rep_mean = 30.0
        self.delta_rep_sd = 5.0
        self.alpha = 0.8
        self.L_sen = 3000.0
        self.target = 6500.0
        self.max_steps = 200
        self.t = 0

        # state
        self.L = None
        self.mut = None

        # actions: per-cell telomerase activation [0,1]^n_cells
        self.action_space = spaces.Box(
            low=0.0, high=1.0, shape=(self.n_cells,), dtype=np.float32
        )

        # observation: summary + all telomeres (can be reduced)
        obs_dim = 4 + self.n_cells
        self.observation_space = spaces.Box(
            low=0.0, high=1e5, shape=(obs_dim,), dtype=np.float32
        )

    def reset(self, seed=None, options=None):
        if seed is not None:
            self.rng.seed(seed)
        self.L = self.rng.normal(8000.0, 200.0, self.n_cells).clip(100, None)
        self.mut = np.zeros(self.n_cells, dtype=np.float32)
        self.t = 0
        return self._obs(), {}

    def _obs(self):
        mean_L = float(self.L.mean())
        median_L = float(np.median(self.L))
        mn = float(self.L.min())
        mx = float(self.L.max())
        return np.concatenate([[mean_L, median_L, mn, mx], self.L]).astype(np.float32)

    def step(self, action):
        a = np.clip(action, 0.0, 1.0)

        # per-cell update
        delta_rep = np.maximum(
            0.0, self.rng.normal(self.delta_rep_mean, self.delta_rep_sd, self.n_cells)
        )
        self.L = (
            self.L
            - delta_rep
            + self.alpha * a * 100.0
            + self.rng.normal(0, 2.0, self.n_cells)
        )
        self.L = np.clip(self.L, 0.0, None)

        # mutation proxy
        self.mut += self.rng.poisson(0.02 + 0.0001 * np.maximum(0, 4000 - self.L)))
        self.mut += self.rng.poisson(0.01 * a)

        # rewards
        r_homeo = -abs(self.L.mean() - self.target) / self.target
        r_sen = -np.mean(self.L < self.L_sen)
        r_mut = -0.1 * np.mean(self.mut)

        reward = r_homeo + 2.0 * r_sen + r_mut

        self.t += 1
        done = self.t >= self.max_steps
        info = dict(mean_L=float(self.L.mean()), mut=float(self.mut.mean()))
        return self._obs(), float(reward), done, False, info

    def render(self, mode="human"):
        print(f"t={self.t} mean_L={self.L.mean():.1f} mut={self.mut.mean():.2f}")
'''

path = "/mnt/data/ooc_telomere_env_cell_actions.py"
with open(path, "w") as f:
    f.write(script)

path


'/mnt/data/ooc_telomere_env_cell_actions.py'

In [13]:
# Generating a multi-organ (multi-chip) environment script for Organ-on-Chip telomere control.
# The script will be saved to /mnt/data/multi_ooc_network.py
# It defines:
#  - OOCTelomereCellActionEnv (imported or redefined inline)
#  - MultiOOCTopologyEnv: composes multiple chips and models inter-chip signaling (simple diffusion)
#  - Example usage: reset and step through the multi-chip env
#
# This is purely in-silico simulation code for RL/control research.

script = r'''
"""
multi_ooc_network.py

Multi-organ (multi-chip) Organ-on-Chip telomere environment.

Defines:
 - OOCTelomereCellActionEnv: per-chip environment (redefined inline for standalone use)
 - MultiOOCTopologyEnv: composes multiple chips and models simple inter-chip signaling
 - Example usage at the bottom.

Notes:
 - Actions: concatenated per-chip per-cell actions (shape = sum(n_cells_i))
 - Observations: concatenated summaries per chip
 - Inter-chip signaling: modeled as cytokine field values that diffuse between chips and affect mutation rates
 - Intended as simulation backbone for RL/control experiments across connected organ-chips
"""

import numpy as np
import gymnasium as gym
from gymnasium import spaces

# -----------------------
# Simple per-chip env (standalone)
# -----------------------
class OOCTelomereCellActionEnv:
    def __init__(self, n_cells=16, seed=None, chip_id=0):
        self.n_cells = n_cells
        self.chip_id = chip_id
        self.rng = np.random.RandomState(seed)
        self.delta_rep_mean = 30.0
        self.delta_rep_sd = 5.0
        self.alpha = 0.8
        self.L_sen = 3000.0
        self.target = 6500.0
        self.max_steps = 200
        self.t = 0
        self.L = None
        self.mut = None
        self.cytokine = 0.0  # local cytokine level affecting stress/mutation

    def reset(self):
        self.L = self.rng.normal(8000.0, 200.0, self.n_cells).clip(100, None)
        self.mut = np.zeros(self.n_cells, dtype=np.float32)
        self.t = 0
        self.cytokine = 0.0
        return self._obs()

    def _obs(self):
        mean_L = float(self.L.mean())
        median_L = float(np.median(self.L))
        mn = float(self.L.min())
        mx = float(self.L.max())
        # include cytokine in observation
        return np.concatenate([[mean_L, median_L, mn, mx, self.cytokine], self.L]).astype(np.float32)

    def step(self, actions):
        # actions: vector len n_cells [0,1]
        a = np.clip(actions, 0.0, 1.0)
        delta_rep = np.maximum(0.0, self.rng.normal(self.delta_rep_mean, self.delta_rep_sd, self.n_cells))
        self.L = (
            self.L
            - delta_rep
            + self.alpha * a * 100.0
            + self.rng.normal(0, 2.0, self.n_cells)
        )
        self.L = np.clip(self.L, 0.0, None)

        # mutation proxy increases with cytokine (stress) and low telomere
        baseline = 0.02 + 0.0001 * np.maximum(0, 4000 - self.L)
        cytokine_effect = 0.005 * self.cytokine  # cytokine increases mutation rate
        self.mut += self.rng.poisson(baseline + cytokine_effect)
        self.mut += self.rng.poisson(0.01 * a)

        # rewards per chip
        r_homeo = -abs(self.L.mean() - self.target) / self.target
        r_sen = -np.mean(self.L < self.L_sen)
        r_mut = -0.1 * np.mean(self.mut)
        reward = r_homeo + 2.0 * r_sen + r_mut

        self.t += 1
        done = self.t >= self.max_steps
        info = {"mean_L": float(self.L.mean()), "mut_mean": float(self.mut.mean()), "cytokine": float(self.cytokine)}
        return self._obs(), float(reward), done, False, info

# -----------------------
# Multi-chip topology environment
# -----------------------
class MultiOOCTopologyEnv(gym.Env):
    """
    Composes multiple OOCTelomereCellActionEnv instances into a connected topology.
    Inter-chip signaling: each chip has a cytokine variable; after each step cytokine diffuses
    between connected chips via adjacency matrix and decays over time.
    """
    metadata = {"render_modes": ["human"]}

    def __init__(self, chip_config=[16,16], adjacency=None, seed=None):
        """
        chip_config: list of ints, number of cells per chip
        adjacency: square matrix NxN specifying diffusion weights (if None, fully connected small weight)
        """
        super().__init__()
        self.n_chips = len(chip_config)
        self.chips = [OOCTelomereCellActionEnv(n_cells=n, seed=(seed or 0)+i, chip_id=i) for i,n in enumerate(chip_config)]
        self.chip_config = chip_config
        self.total_cells = sum(chip_config)
        self.seed = seed
        # adjacency matrix
        if adjacency is None:
            # small all-to-all coupling
            self.adjacency = np.full((self.n_chips, self.n_chips), 0.1)
            np.fill_diagonal(self.adjacency, 0.0)
        else:
            self.adjacency = np.array(adjacency, dtype=float)
        # cytokine decay
        self.decay = 0.9

        # gym spaces
        # action: concatenated per-chip per-cell actions
        self.action_space = spaces.Box(low=0.0, high=1.0, shape=(self.total_cells,), dtype=np.float32)
        # observation: concatenated observations per chip (each chip obs length = 5 + n_cells)
        obs_len_per_chip = [5 + n for n in chip_config]
        self.obs_slices = []
        total_obs = 0
        for l in obs_len_per_chip:
            self.obs_slices.append((total_obs, total_obs + l))
            total_obs += l
        self.observation_space = spaces.Box(low=0.0, high=1e6, shape=(total_obs,), dtype=np.float32)

    def reset(self, seed=None, options=None):
        if seed is not None:
            np.random.seed(seed)
        obs_parts = []
        for chip in self.chips:
            obs_parts.append(chip.reset())
        self._assemble_obs(obs_parts)
        return self._get_obs(), {}

    def _assemble_obs(self, obs_parts):
        # store individual chip obs in self.last_obs_parts
        self.last_obs_parts = obs_parts

    def _get_obs(self):
        return np.concatenate(self.last_obs_parts).astype(np.float32)

    def step(self, action):
        # action: full concatenated vector length total_cells
        assert len(action) == self.total_cells, "Action length mismatch"
        rewards = []
        infos = []
        dones = []
        obs_parts = []
        idx = 0
        # apply per-chip slices
        for chip_i, chip in enumerate(self.chips):
            n = chip.n_cells
            act_slice = action[idx: idx + n]
            obs, r, done, trunc, info = chip.step(act_slice)
            rewards.append(r)
            infos.append(info)
            dones.append(done)
            obs_parts.append(obs)
            idx += n

        # update cytokine diffusion between chips
        cytokines = np.array([chip.cytokine for chip in self.chips])
        # simple diffusion: new = decay*old + adjacency.dot(old) * small_factor
        cytokines = cytokines * self.decay + self.adjacency.dot(cytokines) * 0.05
        # write back
        for i, chip in enumerate(self.chips):
            chip.cytokine = float(cytokines[i])

        self._assemble_obs(obs_parts)
        total_reward = float(np.sum(rewards))
        done = all(dones)
        info = {"per_chip": infos}
        return self._get_obs(), total_reward, done, False, info

    def render(self, mode="human"):
        for i, chip in enumerate(self.chips):
            print(f"Chip {i}: mean_L={chip.L.mean():.1f}, mut_mean={chip.mut.mean():.2f}, cytokine={chip.cytokine:.3f}")

# -----------------------
# Example usage
# -----------------------
if __name__ == "__main__":
    # create a small network of 3 chips with heterogenous sizes
    chip_config = [12, 20, 16]
    # adjacency: chain topology (0<->1<->2)
    adjacency = np.array([[0.0, 0.2, 0.0],
                          [0.2, 0.0, 0.2],
                          [0.0, 0.2, 0.0]])
    env = MultiOOCTopologyEnv(chip_config=chip_config, adjacency=adjacency, seed=123)
    obs, _ = env.reset()
    print("Initial obs length:", len(obs))
    # random action example
    action = np.random.rand(env.total_cells).astype(np.float32)
    obs, reward, done, trunc, info = env.step(action)
    print("Step reward:", reward)
    env.render()
'''

path = "/mnt/data/multi_ooc_network.py"
with open(path, "w") as f:
    f.write(script)

path


'/mnt/data/multi_ooc_network.py'

In [14]:
"""
multi_ooc_network.py

Multi-organ (multi-chip) Organ-on-Chip telomere environment.

Defines:
 - OOCTelomereCellActionEnv: per-chip environment (redefined inline for standalone use)
 - MultiOOCTopologyEnv: composes multiple chips and models simple inter-chip signaling
 - Example usage at the bottom.

Notes:
 - Actions: concatenated per-chip per-cell actions (shape = sum(n_cells_i))
 - Observations: concatenated summaries per chip
 - Inter-chip signaling: modeled as cytokine field values that diffuse between chips and affect mutation rates
 - Intended as simulation backbone for RL/control experiments across connected organ-chips
"""

import numpy as np
import gymnasium as gym
from gymnasium import spaces

# -----------------------
# Simple per-chip env (standalone)
# -----------------------
class OOCTelomereCellActionEnv:
    def __init__(self, n_cells=16, seed=None, chip_id=0):
        self.n_cells = n_cells
        self.chip_id = chip_id
        self.rng = np.random.RandomState(seed)
        self.delta_rep_mean = 30.0
        self.delta_rep_sd = 5.0
        self.alpha = 0.8
        self.L_sen = 3000.0
        self.target = 6500.0
        self.max_steps = 200
        self.t = 0
        self.L = None
        self.mut = None
        self.cytokine = 0.0  # local cytokine level affecting stress/mutation

    def reset(self):
        self.L = self.rng.normal(8000.0, 200.0, self.n_cells).clip(100, None)
        self.mut = np.zeros(self.n_cells, dtype=np.float32)
        self.t = 0
        self.cytokine = 0.0
        return self._obs()

    def _obs(self):
        mean_L = float(self.L.mean())
        median_L = float(np.median(self.L))
        mn = float(self.L.min())
        mx = float(self.L.max())
        # include cytokine in observation
        return np.concatenate([[mean_L, median_L, mn, mx, self.cytokine], self.L]).astype(np.float32)

    def step(self, actions):
        # actions: vector len n_cells [0,1]
        a = np.clip(actions, 0.0, 1.0)
        delta_rep = np.maximum(0.0, self.rng.normal(self.delta_rep_mean, self.delta_rep_sd, self.n_cells))
        self.L = (
            self.L
            - delta_rep
            + self.alpha * a * 100.0
            + self.rng.normal(0, 2.0, self.n_cells)
        )
        self.L = np.clip(self.L, 0.0, None)

        # mutation proxy increases with cytokine (stress) and low telomere
        baseline = 0.02 + 0.0001 * np.maximum(0, 4000 - self.L)
        cytokine_effect = 0.005 * self.cytokine  # cytokine increases mutation rate
        self.mut += self.rng.poisson(baseline + cytokine_effect)
        self.mut += self.rng.poisson(0.01 * a)

        # rewards per chip
        r_homeo = -abs(self.L.mean() - self.target) / self.target
        r_sen = -np.mean(self.L < self.L_sen)
        r_mut = -0.1 * np.mean(self.mut)
        reward = r_homeo + 2.0 * r_sen + r_mut

        self.t += 1
        done = self.t >= self.max_steps
        info = {"mean_L": float(self.L.mean()), "mut_mean": float(self.mut.mean()), "cytokine": float(self.cytokine)}
        return self._obs(), float(reward), done, False, info

# -----------------------
# Multi-chip topology environment
# -----------------------
class MultiOOCTopologyEnv(gym.Env):
    """
    Composes multiple OOCTelomereCellActionEnv instances into a connected topology.
    Inter-chip signaling: each chip has a cytokine variable; after each step cytokine diffuses
    between connected chips via adjacency matrix and decays over time.
    """
    metadata = {"render_modes": ["human"]}

    def __init__(self, chip_config=[16,16], adjacency=None, seed=None):
        """
        chip_config: list of ints, number of cells per chip
        adjacency: square matrix NxN specifying diffusion weights (if None, fully connected small weight)
        """
        super().__init__()
        self.n_chips = len(chip_config)
        self.chips = [OOCTelomereCellActionEnv(n_cells=n, seed=(seed or 0)+i, chip_id=i) for i,n in enumerate(chip_config)]
        self.chip_config = chip_config
        self.total_cells = sum(chip_config)
        self.seed = seed
        # adjacency matrix
        if adjacency is None:
            # small all-to-all coupling
            self.adjacency = np.full((self.n_chips, self.n_chips), 0.1)
            np.fill_diagonal(self.adjacency, 0.0)
        else:
            self.adjacency = np.array(adjacency, dtype=float)
        # cytokine decay
        self.decay = 0.9

        # gym spaces
        # action: concatenated per-chip per-cell actions
        self.action_space = spaces.Box(low=0.0, high=1.0, shape=(self.total_cells,), dtype=np.float32)
        # observation: concatenated observations per chip (each chip obs length = 5 + n_cells)
        obs_len_per_chip = [5 + n for n in chip_config]
        self.obs_slices = []
        total_obs = 0
        for l in obs_len_per_chip:
            self.obs_slices.append((total_obs, total_obs + l))
            total_obs += l
        self.observation_space = spaces.Box(low=0.0, high=1e6, shape=(total_obs,), dtype=np.float32)

    def reset(self, seed=None, options=None):
        if seed is not None:
            np.random.seed(seed)
        obs_parts = []
        for chip in self.chips:
            obs_parts.append(chip.reset())
        self._assemble_obs(obs_parts)
        return self._get_obs(), {}

    def _assemble_obs(self, obs_parts):
        # store individual chip obs in self.last_obs_parts
        self.last_obs_parts = obs_parts

    def _get_obs(self):
        return np.concatenate(self.last_obs_parts).astype(np.float32)

    def step(self, action):
        # action: full concatenated vector length total_cells
        assert len(action) == self.total_cells, "Action length mismatch"
        rewards = []
        infos = []
        dones = []
        obs_parts = []
        idx = 0
        # apply per-chip slices
        for chip_i, chip in enumerate(self.chips):
            n = chip.n_cells
            act_slice = action[idx: idx + n]
            obs, r, done, trunc, info = chip.step(act_slice)
            rewards.append(r)
            infos.append(info)
            dones.append(done)
            obs_parts.append(obs)
            idx += n

        # update cytokine diffusion between chips
        cytokines = np.array([chip.cytokine for chip in self.chips])
        # simple diffusion: new = decay*old + adjacency.dot(old) * small_factor
        cytokines = cytokines * self.decay + self.adjacency.dot(cytokines) * 0.05
        # write back
        for i, chip in enumerate(self.chips):
            chip.cytokine = float(cytokines[i])

        self._assemble_obs(obs_parts)
        total_reward = float(np.sum(rewards))
        done = all(dones)
        info = {"per_chip": infos}
        return self._get_obs(), total_reward, done, False, info

    def render(self, mode="human"):
        for i, chip in enumerate(self.chips):
            print(f"Chip {i}: mean_L={chip.L.mean():.1f}, mut_mean={chip.mut.mean():.2f}, cytokine={chip.cytokine:.3f}")

# -----------------------
# Example usage
# -----------------------
if __name__ == "__main__":
    # create a small network of 3 chips with heterogenous sizes
    chip_config = [12, 20, 16]
    # adjacency: chain topology (0<->1<->2)
    adjacency = np.array([[0.0, 0.2, 0.0],
                          [0.2, 0.0, 0.2],
                          [0.0, 0.2, 0.0]])
    env = MultiOOCTopologyEnv(chip_config=chip_config, adjacency=adjacency, seed=123)
    obs, _ = env.reset()
    print("Initial obs length:", len(obs))
    # random action example
    action = np.random.rand(env.total_cells).astype(np.float32)
    obs, reward, done, trunc, info = env.step(action)
    print("Step reward:", reward)
    env.render()


Initial obs length: 63
Step reward: -0.6983940468148878
Chip 0: mean_L=7950.2, mut_mean=0.08, cytokine=0.000
Chip 1: mean_L=7985.0, mut_mean=0.00, cytokine=0.000
Chip 2: mean_L=8009.6, mut_mean=0.06, cytokine=0.000


In [15]:
# Writing a decentralized multi-agent script for Organ-on-Chip environments.
# The script will be saved to /mnt/data/decentralized_ooc_agents.py
# It trains one agent per chip on standalone per-chip environments, then demonstrates deployment
# by loading the trained agents and running them jointly in the MultiOOCTopologyEnv.
#
# This cell writes the script file but does not execute heavy training here.

script = r'''
"""
decentralized_ooc_agents.py

Decentralized multi-agent approach for Organ-on-Chip control.

Workflow:
 1) Train one agent per chip on a standalone per-chip environment (OOCTelomereCellActionEnv).
 2) Save each agent's model to disk.
 3) Deploy agents jointly in the MultiOOCTopologyEnv by having each agent control its chip
    using only local observations (decentralized execution).

Notes:
 - Training uses stable-baselines3.PPO if installed. If not available, the script will print instructions.
 - Training is independent per chip (decentralized) which simplifies credit assignment and allows
   training on different hardware/resources.
 - Deployment composes saved agents into the multi-chip environment and runs episodes to evaluate joint behavior.
"""

import os
import numpy as np

# ---- Per-chip environment (standalone) ----
class OOCTelomereCellActionEnv:
    def __init__(self, n_cells=16, seed=None, chip_id=0, max_steps=200):
        self.n_cells = n_cells
        self.chip_id = chip_id
        self.rng = np.random.RandomState(seed)
        self.delta_rep_mean = 30.0
        self.delta_rep_sd = 5.0
        self.alpha = 0.8
        self.L_sen = 3000.0
        self.target = 6500.0
        self.max_steps = max_steps
        self.t = 0
        self.L = None
        self.mut = None
        self.cytokine = 0.0  # for standalone training can be 0

    def reset(self):
        self.L = self.rng.normal(8000.0, 200.0, self.n_cells).clip(100, None)
        self.mut = np.zeros(self.n_cells, dtype=np.float32)
        self.t = 0
        self.cytokine = 0.0
        return self._obs()

    def _obs(self):
        mean_L = float(self.L.mean())
        median_L = float(np.median(self.L))
        mn = float(self.L.min())
        mx = float(self.L.max())
        return np.concatenate([[mean_L, median_L, mn, mx, self.cytokine], self.L]).astype(np.float32)

    def step(self, actions):
        a = np.clip(actions, 0.0, 1.0)
        delta_rep = np.maximum(0.0, self.rng.normal(self.delta_rep_mean, self.delta_rep_sd, self.n_cells))
        self.L = (
            self.L
            - delta_rep
            + self.alpha * a * 100.0
            + self.rng.normal(0, 2.0, self.n_cells)
        )
        self.L = np.clip(self.L, 0.0, None)
        baseline = 0.02 + 0.0001 * np.maximum(0, 4000 - self.L)
        self.mut += self.rng.poisson(baseline)
        self.mut += self.rng.poisson(0.01 * a)
        r_homeo = -abs(self.L.mean() - self.target) / self.target
        r_sen = -np.mean(self.L < self.L_sen)
        r_mut = -0.1 * np.mean(self.mut)
        reward = r_homeo + 2.0 * r_sen + r_mut
        self.t += 1
        done = self.t >= self.max_steps
        info = {"mean_L": float(self.L.mean()), "mut_mean": float(self.mut.mean())}
        return self._obs(), float(reward), done, False, info

# ---- Multi-chip topology environment (imported from multi_ooc_network if available) ----
try:
    from multi_ooc_network import MultiOOCTopologyEnv
except Exception:
    # Provide a minimal fallback implementation (subset of functionality)
    import gymnasium as gym
    from gymnasium import spaces

    class MultiOOCTopologyEnv(gym.Env):
        def __init__(self, chip_config=[16,16], adjacency=None, seed=None):
            super().__init__()
            self.chip_config = list(chip_config)
            self.n_chips = len(self.chip_config)
            self.chips = [OOCTelomereCellActionEnv(n_cells=n, seed=(seed or 0)+i, chip_id=i) for i,n in enumerate(self.chip_config)]
            self.total_cells = sum(self.chip_config)
            self.action_space = spaces.Box(low=0.0, high=1.0, shape=(self.total_cells,), dtype=np.float32)
            obs_len_per_chip = [5 + n for n in self.chip_config]
            total_obs = sum(obs_len_per_chip)
            self.observation_space = spaces.Box(low=0.0, high=1e6, shape=(total_obs,), dtype=np.float32)
            # simple adjacency
            if adjacency is None:
                self.adjacency = np.full((self.n_chips, self.n_chips), 0.1)
                np.fill_diagonal(self.adjacency, 0.0)
            else:
                self.adjacency = np.array(adjacency, dtype=float)
            self.decay = 0.9

        def reset(self, seed=None, options=None):
            obs_parts = []
            for chip in self.chips:
                obs_parts.append(chip.reset())
            self.last_obs_parts = obs_parts
            return self._get_obs(), {}

        def _get_obs(self):
            return np.concatenate(self.last_obs_parts).astype(np.float32)

        def step(self, action):
            rewards = []
            obs_parts = []
            idx = 0
            for chip in self.chips:
                n = chip.n_cells
                act_slice = action[idx: idx + n]
                obs, r, done, trunc, info = chip.step(act_slice)
                rewards.append(r)
                obs_parts.append(obs)
                idx += n
            # simple cytokine diffusion (not used during decentralized training)
            cytokines = np.array([chip.cytokine for chip in self.chips])
            cytokines = cytokines * self.decay + self.adjacency.dot(cytokines) * 0.05
            for i, chip in enumerate(self.chips):
                chip.cytokine = float(cytokines[i])
            self.last_obs_parts = obs_parts
            return self._get_obs(), float(np.sum(rewards)), all([chip.t >= chip.max_steps for chip in self.chips]), False, {"per_chip": [{"mean_L": c.L.mean(), "mut_mean": c.mut.mean()} for c in self.chips]}

# ---- Training harness per-chip (decentralized training) ----
def train_per_chip_agents(chip_config=[16,16], timesteps_per_agent=100000, save_dir="./agents"):
    """
    Train one PPO agent per chip on standalone OOCTelomereCellActionEnv instances.
    Each agent controls only its chip (per-cell actions).
    """
    os.makedirs(save_dir, exist_ok=True)
    try:
        from stable_baselines3 import PPO
        from stable_baselines3.common.env_checker import check_env
        from stable_baselines3.common.vec_env import DummyVecEnv
        import gymnasium as gym
        print("stable-baselines3 detected. Training agents...")

        for i, n_cells in enumerate(chip_config):
            print(f"Preparing training for agent_{i} (n_cells={n_cells})...")
            # wrap standalone per-chip env for SB3
            def make_env(seed=0, n=n_cells, chip_id=i):
                def _thunk():
                    env = PerChipGymWrapper(n, seed=seed, chip_id=chip_id)
                    return env
                return _thunk

            vec_env = DummyVecEnv([make_env(seed=42+i) for _ in range(4)])  # 4 parallel envs
            model = PPO("MlpPolicy", vec_env, verbose=1)
            model.learn(total_timesteps=timesteps_per_agent)
            model_path = os.path.join(save_dir, f"agent_chip_{i}.zip")
            model.save(model_path)
            print(f"Saved agent {i} to {model_path}")
        print("All agents trained and saved.")
    except Exception as e:
        print("stable-baselines3 not available or training failed:", str(e))
        print("Fallback: create random policy placeholders (no training).")
        # create placeholder random policies (numpy files with seed)
        for i, n_cells in enumerate(chip_config):
            placeholder = {"n_cells": n_cells, "seed": int(42+i)}
            np.save(os.path.join(save_dir, f"agent_chip_{i}_placeholder.npy"), placeholder)
        print("Saved placeholder policies to", save_dir)

# ---- Minimal Gym wrapper for standalone per-chip env for SB3 compatibility ----
import gymnasium as gym
from gymnasium import spaces
class PerChipGymWrapper(gym.Env):
    def __init__(self, n_cells=16, seed=None, chip_id=0):
        super().__init__()
        self.inner = OOCTelomereCellActionEnv(n_cells=n_cells, seed=seed, chip_id=chip_id)
        self.action_space = spaces.Box(low=0.0, high=1.0, shape=(n_cells,), dtype=np.float32)
        obs_len = 5 + n_cells
        self.observation_space = spaces.Box(low=0.0, high=1e6, shape=(obs_len,), dtype=np.float32)

    def reset(self, seed=None, options=None):
        obs = self.inner.reset()
        return obs, {}

    def step(self, action):
        obs, reward, done, trunc, info = self.inner.step(action)
        return obs, reward, done, trunc, info

# ---- Deployment: load per-chip agents and run in MultiOOCTopologyEnv ----
def deploy_agents_in_multi_env(chip_config=[16,16], agents_dir="./agents", episodes=5):
    """
    Load saved agents (SB3 models or placeholders) and run episodes in the multi-chip env.
    Each agent receives local observation (its chip's subvector) and outputs per-cell actions.
    """
    # create multi env
    env = MultiOOCTopologyEnv(chip_config=chip_config, seed=123)
    total_cells = env.total_cells

    # load agents
    agents = []
    for i in range(len(chip_config)):
        model_path = os.path.join(agents_dir, f"agent_chip_{i}.zip")
        if os.path.exists(model_path):
            try:
                from stable_baselines3 import PPO
                model = PPO.load(model_path)
                agents.append(("sb3", model))
                print(f"Loaded SB3 agent for chip {i}")
                continue
            except Exception as e:
                print("Failed to load SB3 model:", e)
        # fallback: try placeholder
        placeholder_path = os.path.join(agents_dir, f"agent_chip_{i}_placeholder.npy")
        if os.path.exists(placeholder_path):
            placeholder = np.load(placeholder_path, allow_pickle=True).item()
            agents.append(("placeholder", placeholder))
            print(f"Loaded placeholder for chip {i}")
        else:
            agents.append(("random", {"n_cells": chip_config[i]}))
            print(f"No agent found for chip {i}; using random policy")

    # run episodes
    for ep in range(episodes):
        obs, _ = env.reset()
        done = False
        ep_reward = 0.0
        while not done:
            # split observations per chip
            actions = []
            idx = 0
            for i, n in enumerate(chip_config):
                obs_start = sum([5 + x for x in chip_config[:i]])
                obs_end = obs_start + 5 + n
                local_obs = obs[obs_start:obs_end]
                agent_type, agent_obj = agents[i]
                if agent_type == "sb3":
                    # SB3 models expect flattened observations; we pass local_obs
                    action, _ = agent_obj.predict(local_obs, deterministic=True)
                elif agent_type == "placeholder":
                    seed = int(agent_obj["seed"])
                    rng = np.random.RandomState(seed)
                    action = rng.rand(n).astype(np.float32) * 0.1  # low random activation as placeholder
                else:
                    action = np.random.rand(n).astype(np.float32) * 0.1
                actions.append(action)
            # concatenate and step
            full_action = np.concatenate(actions)
            obs, reward, done, trunc, info = env.step(full_action)
            ep_reward += reward
        print(f"Episode {ep} total reward: {ep_reward:.3f}")

# ---- CLI ----
if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--train", action="store_true", help="Train per-chip agents (requires stable-baselines3)")
    parser.add_argument("--timesteps", type=int, default=100000, help="Timesteps per agent")
    parser.add_argument("--deploy", action="store_true", help="Deploy agents in multi-chip env")
    parser.add_argument("--agents_dir", type=str, default="./agents", help="Directory to save/load agents")
    args = parser.parse_args()

    chip_config = [12, 20, 16]  # example heterogenous chips
    if args.train:
        train_per_chip_agents(chip_config=chip_config, timesteps_per_agent=args.timesteps, save_dir=args.agents_dir)
    if args.deploy:
        deploy_agents_in_multi_env(chip_config=chip_config, agents_dir=args.agents_dir, episodes=3)
'''

path = "/mnt/data/decentralized_ooc_agents.py"
with open(path, "w") as f:
    f.write(script)

path


'/mnt/data/decentralized_ooc_agents.py'