# Morphogenetic System Lineage Analysis

This notebook analyzes the telemetry data produced by the adversarial evolution harness, focusing on lineage dynamics, fitness, and population diversity over generations.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
from pathlib import Path

sns.set_theme(style="whitegrid")

## 1. Load Data

First, we load the aggregated run summary data. The `run_evolution_smoke_test.sh` script produces a `summary.json` for each generation, which we'll need to aggregate.

In [None]:
def load_summaries(run_dir: Path) -> pd.DataFrame:
    """Load all summary.json files from a run directory and aggregate them."""
    summaries = []
    for summary_path in sorted(run_dir.glob('**/summary.json')):
        with open(summary_path, 'r') as f:
            data = json.load(f)
            
            # Extract generation from path
            try:
                gen_part = [part for part in summary_path.parts if part.startswith('gen')][0]
                generation = int(gen_part.replace('gen', ''))
            except (IndexError, ValueError):
                generation = -1 # Or handle as an error
            
            record = {
                'generation': generation,
                'scenario_name': data.get('run_metadata', {}).get('scenario_name'),
                'fitness_score': data.get('annotations', {}).get('fitness_score'),
                'lineage_pressure': data.get('annotations', {}).get('lineage_pressure'),
                'lineage_component': data.get('annotations', {}).get('lineage_component'),
                'breach_observed': data.get('annotations', {}).get('breach_observed'),
                'final_cell_count': data.get('run_metadata', {}).get('final_cell_count'),
                'total_replications': data.get('stats', {}).get('total_replications'),
                'total_signals': data.get('stats', {}).get('total_signals'),
            }
            summaries.append(record)
            
    return pd.DataFrame(summaries).sort_values('generation').reset_index(drop=True)

# --- Point this to the output directory of your evolution run ---
RUN_DIRECTORY = Path("target/pitch_demo/runs") 
# ----------------------------------------------------------------

if RUN_DIRECTORY.exists():
    df_summary = load_summaries(RUN_DIRECTORY)
    print(f"Loaded {len(df_summary)} summaries from {RUN_DIRECTORY.as_posix()}")
    display(df_summary.head())
else:
    print(f"Run directory {RUN_DIRECTORY.as_posix()} not found. Please run an evolution first.")
    df_summary = pd.DataFrame() # Create an empty dataframe to avoid errors later


## 2. Analyze Fitness and Lineage Pressure Over Generations

In [None]:
if not df_summary.empty:
    fig, ax1 = plt.subplots(figsize=(12, 6))

    sns.lineplot(data=df_summary, x='generation', y='fitness_score', ax=ax1, color='b', marker='o', label='Fitness Score')
    ax1.set_xlabel('Generation')
    ax1.set_ylabel('Fitness Score', color='b')
    ax1.tick_params(axis='y', labelcolor='b')

    ax2 = ax1.twinx()
    sns.lineplot(data=df_summary, x='generation', y='lineage_pressure', ax=ax2, color='r', marker='x', label='Lineage Pressure')
    ax2.set_ylabel('Lineage Pressure', color='r')
    ax2.tick_params(axis='y', labelcolor='r')

    fig.suptitle('Fitness Score and Lineage Pressure Over Generations')
    fig.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()
else:
    print("No summary data to plot.")

### Analysis
This plot helps us understand the relationship between the evolutionary pressure we apply (lineage pressure) and the resulting fitness of the attack candidates. We expect to see fitness increase as the system explores more effective attack vectors.

## 3. Lineage Diversity Analysis

Now we'll load the detailed lineage data to analyze how the population of cell lineages changes over the course of a single run. For this, we need to pick one of the generated `lineage_long_form.csv` files.

In [None]:
def load_lineage_data(run_dir: Path, generation: int = 0, candidate_id: str = "seed-0") -> pd.DataFrame:
    """Loads a specific lineage CSV file."""
    # This is a simplification. A real run would have multiple candidates.
    # We'll just look for the first available lineage file in the specified generation.
    gen_dir = run_dir / f'gen{generation:03}'
    lineage_files = list(gen_dir.glob('**/lineage_long_form.csv'))
    if not lineage_files:
        return pd.DataFrame()
    
    return pd.read_csv(lineage_files[0])

if RUN_DIRECTORY.exists():
    # Load data for the first generation as an example
    df_lineage = load_lineage_data(RUN_DIRECTORY, generation=1)
    if not df_lineage.empty:
        print(f"Loaded {len(df_lineage)} lineage records.")
        display(df_lineage.head())
    else:
        print("No lineage data found for the selected run.")
else:
    print(f"Run directory {RUN_DIRECTORY.as_posix()} not found.")
    df_lineage = pd.DataFrame()

In [None]:
if not df_lineage.empty:
    plt.figure(figsize=(14, 7))
    sns.lineplot(data=df_lineage, x='step', y='count', hue='lineage', marker='.')
    plt.title('Lineage Population Over Time')
    plt.xlabel('Simulation Step')
    plt.ylabel('Number of Cells')
    plt.legend(title='Lineage Type', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()
else:
    print("No lineage data to plot.")

### Analysis
This plot shows which cell lineages dominate at different points in the simulation. A successful attack might be one that quickly suppresses defensive lineages or promotes the growth of a specific adversarial lineage.