In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from osdr_validation.tissue_simulation import simulate_model
from osdr_validation.visualization import plot_average_neighbourhoods, neighbourhoods_over_time

## Generate 100-Tissue Dataset

This will:
1. Initialize 100 random tissues (with 250-9750 cells per type)
2. Simulate stochastic proliferation for **n=1000** time steps
3. Record tissue states every **t=100** time steps
4. Save initial and post-proliferation datasets to CSV

**Note:** This process takes approximately **2 hours** on a multi-core machine using parallelization.

## Generate 100-Tissue Dataset

This will:
1. Initialize 100 random tissues (with 250-9750 cells per type)
2. Simulate stochastic proliferation for **n=1000** time steps
3. Record tissue states every **t=100** time steps
4. Save initial and post-proliferation datasets to CSV

**Note:** This process takes approximately **2 hours** on a multi-core machine using parallelization.

## Load Existing Dataset

Load previously generated simulation data.

In [None]:
# Load saved simulation data
initial_df = pd.read_csv('../data/simulated_tissues_alt.csv')
post_df = pd.read_csv('../data/simulated_tissues_post_alt.csv')

print("Loaded datasets:")
print(f"  Initial tissues: {len(initial_df)} cells across {initial_df['Tissue_ID'].nunique()} tissues")
print(f"  Post-proliferation: {len(post_df)} cells")
print(f"  Time steps recorded: {sorted(post_df['Time_Step'].unique())}")

## Visualize Tissue Statistics

### Cell Counts and Neighbourhood Densities Across All Tissues

In [None]:
# Plot cell counts and average neighbourhood densities across all 100 tissues
plot_average_neighbourhoods(initial_df, post_df)

### Time Series Analysis for Individual Tissues

Examine how cell counts and neighbourhood densities evolve over time for specific tissues.

In [None]:
# Plot time series for tissue 0
neighbourhoods_over_time(initial_df, post_df, tissue_id=0)

In [None]:
# Examine another tissue (e.g., tissue 7)
neighbourhoods_over_time(initial_df, post_df, tissue_id=7)

## Summary

The simulation dataset has been generated and validated:

- **100 tissue replicates** with 250-9750 cells per type at initialization
- **Stochastic proliferation** for 1000 time steps with known dynamical model
- **Recorded states** at t=100, 200, ..., 1000 for temporal analysis
- **Neighbourhood dynamics** biased toward steady state at X=16 cells

Key observations:
- Tissues converge toward steady state over time
- Average neighbourhood densities stabilize around logâ‚‚(16) = 4
- Cell counts remain relatively stable after initial transient dynamics

**Next step:** Perform model inference on this simulated data in notebook 3 to validate OSDR method.