# Drossel-Schwab Forest Fire Model

Four experiments:
1. **f/p Ratio Sweep** - Effect of fire-to-growth ratio
2. **p Parameter Sweep** - Effect of growth probability
3. **Grid Size Effects (RQ1)** - Scaling with system size
4. **f Parameter Sweep** - Effect of fire/lightning probability

In [None]:
import results
from utils import (
    create_experiment_dir, get_latest_experiment_dir,
    run_parallel_simulations, save_summary,
    load_experiment_data, load_summary_map,
    plot_fire_size_distribution, plot_density_timeseries, plot_cluster_size_distribution,
)
from simulations.drosselschwab import *
import matplotlib.pyplot as plt

## Test

In [None]:
EXP1_NAME = "suppression_test"
L, steps, runs_per_param = 512, 2000, 3
p = 0.1
f = 1e-4

suppresions = [0, 1, 5, 10, 50, 100, 500, 1000, 5000]


exp1_param_list = []
param_idx = 0
for sup in suppresions:
        param_idx += 1
        for run_idx in range(runs_per_param):
            exp1_param_list.append({'L': L, 'p': p, 'f': f, 'steps': steps,
                                    'param_id': param_idx, 'run_id': run_idx, 'suppress': sup})

print(f"Experiment 1: {len(exp1_param_list)} simulations, {param_idx} parameter sets")

In [None]:
# Run (uncomment to execute)
exp1_outdir = create_experiment_dir(EXP1_NAME)
exp1_results = run_parallel_simulations(exp1_param_list, exp1_outdir)
save_summary(exp1_results, exp1_outdir)

In [None]:
# Analyze
try:
    exp1_dir = get_latest_experiment_dir(EXP1_NAME)
    exp1_data = load_experiment_data(exp1_dir)
    exp1_summary = load_summary_map(exp1_dir)
    plot_fire_size_distribution(exp1_data, exp1_summary, "Exp 1: Fire Size by f/p Ratio",
                                 results.path("exp1_fire_size_dist.png"))
    plot_density_timeseries(exp1_data, exp1_summary, "Exp 1: Tree Density Over Time",
                            results.path("exp1_density_timeseries.png"))
except FileNotFoundError as e:
    print(f"No data: {e}")

In [None]:
# python
"""
Analyze the suppression paradox:
- Group raw fire sizes by suppress value (from debug jsons)
- Define megafire as absolute threshold or top quantile
- Compute CCDFs, tail probs, bootstrapped CIs, KS tests vs baseline
- Run logistic regression predicting whether a run had >=1 megafire
- Plot CCDFs and megafire probability by suppress
"""
import os
import glob
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm

def load_experiment_fires(exp_dir):
    # returns dict: (param_id, run_id) -> dict(params, fires_array)
    dbg_files = sorted(glob.glob(os.path.join(exp_dir, "debug_param*.json")))
    fires_files = sorted(glob.glob(os.path.join(exp_dir, "fires_param*.csv")))
    # map by param/run from filename if possible
    fires_map = {}
    for f in fires_files:
        base = os.path.basename(f)
        # try to infer param/run
        pid = None; rid = None
        for tok in base.replace(".", "_").split("_"):
            if tok.startswith("param"):
                try: pid = int(tok.replace("param", ""));
                except: pass
            if tok.startswith("run"):
                try: rid = int(tok.replace("run", ""));
                except: pass
        try:
            df = pd.read_csv(f)
            if "fire_size" in df.columns:
                arr = df["fire_size"].to_numpy(dtype=float)
            else:
                # fallback first numeric column
                arr = df.select_dtypes(include=[np.number]).iloc[:,0].to_numpy(dtype=float)
        except Exception:
            arr = np.array([], dtype=float)
        fires_map[(pid, rid, base)] = {"path": f, "fires": arr}
    runs = []
    for dbg in dbg_files:
        try:
            info = json.load(open(dbg))
            params = info.get("params", {})
            pid = params.get("param_id")
            rid = params.get("run_id")
            # try to find matching fires file
            match = None
            for (ppid, prid, base), v in fires_map.items():
                if ppid == pid and (rid is None or prid == rid):
                    match = v
                    break
            if match is None:
                # fallback by substring
                for (ppid, prid, base), v in fires_map.items():
                    if pid is not None and f"param{pid}" in base:
                        match = v; break
            fires = match["fires"] if match is not None else np.array([], dtype=float)
            runs.append({"params": params, "fires": np.asarray(fires, dtype=float)})
        except Exception:
            continue
    return runs

def bootstrap_ci(data, statfunc=np.mean, nboot=2000, ci=0.95, rng=None):
    rng = np.random.default_rng(rng)
    n = len(data)
    if n == 0:
        return (np.nan, np.nan, np.nan)
    boots = []
    for _ in range(nboot):
        sample = rng.choice(data, size=n, replace=True)
        boots.append(statfunc(sample))
    boots = np.array(boots)
    lower = np.percentile(boots, (1-ci)/2*100)
    upper = np.percentile(boots, (1+ci)/2*100)
    return statfunc(data), lower, upper

def analyze_suppression_paradox(exp_dir, megafire_threshold=None, megafire_quantile=0.99):
    runs = load_experiment_fires(exp_dir)
    # group by suppress value
    grouped = {}
    per_run_summary = []
    for r in runs:
        params = r["params"]
        sup = params.get("suppress", None)
        fires = r["fires"]
        grouped.setdefault(sup, []).append(fires)
        per_run_summary.append({"suppress": sup, "fires": fires})
    # decide threshold
    if megafire_threshold is None:
        # global quantile across all fires
        all_fires = np.concatenate([f for f in (r["fires"] for r in runs) if len(f)>0]) if runs else np.array([])
        if len(all_fires) == 0:
            raise RuntimeError("No fire data found")
        megafire_threshold = np.quantile(all_fires[all_fires>0], megafire_quantile)  # ignore zeros if present

    # compute statistics per suppress
    stats_table = []
    baseline_key = 0 if 0 in grouped else sorted(grouped.keys())[0]
    baseline_all = np.concatenate(grouped[baseline_key]) if grouped.get(baseline_key) else np.array([])
    for sup, list_runs in sorted(grouped.items(), key=lambda x: (float('inf') if x[0] is None else x[0])):
        all_vec = np.concatenate(list_runs) if len(list_runs)>0 else np.array([])
        # ignore non-positive if interpreting fire size >0
        positive = all_vec[all_vec>0]
        total_fires = len(all_vec)
        megafire_count = np.sum(all_vec >= megafire_threshold)
        megafire_prob = megafire_count / total_fires if total_fires>0 else np.nan
        mean, mean_lo, mean_hi = bootstrap_ci(positive, np.mean)
        median, med_lo, med_hi = bootstrap_ci(positive, np.median)
        prob, prob_lo, prob_hi = bootstrap_ci((all_vec>=megafire_threshold).astype(int), np.mean)
        # KS against baseline
        if len(positive)>0 and len(baseline_all[baseline_all>0])>0:
            ks_stat, ks_p = stats.ks_2samp(positive, baseline_all[baseline_all>0])
        else:
            ks_stat, ks_p = np.nan, np.nan
        stats_table.append({
            "suppress": sup,
            "n_runs": len(list_runs),
            "total_fires": int(total_fires),
            "megafire_prob": prob,
            "megafire_prob_lo": prob_lo,
            "megafire_prob_hi": prob_hi,
            "mean_fire": mean, "mean_lo": mean_lo, "mean_hi": mean_hi,
            "median_fire": median, "median_lo": med_lo, "median_hi": med_hi,
            "ks_stat_vs_baseline": ks_stat, "ks_p_vs_baseline": ks_p
        })

    df_stats = pd.DataFrame(stats_table)
    print("Megafire threshold =", megafire_threshold)
    print(df_stats[["suppress","n_runs","total_fires","megafire_prob","mean_fire","median_fire","ks_p_vs_baseline"]])

    # run-level analysis: did a run have >=1 megafire?
    run_rows = []
    for r in per_run_summary:
        fires = r["fires"]
        run_rows.append({
            "suppress": r["suppress"],
            "had_megafire": int(np.any(fires >= megafire_threshold)),
            "num_megafires": int(np.sum(fires >= megafire_threshold)),
            "total_fires": int(len(fires))
        })
    df_runs = pd.DataFrame(run_rows).dropna(subset=["suppress"])
    # logistic regression (had_megafire ~ suppress)
    # encode suppress numeric if possible
    try:
        df_runs["suppress_num"] = df_runs["suppress"].astype(float)
        X = sm.add_constant(df_runs[["suppress_num"]])
        y = df_runs["had_megafire"]
        logit = sm.Logit(y, X).fit(disp=False)
        print("Logistic regression summary (had_megafire ~ suppress):")
        print(logit.summary())
    except Exception:
        print("Skipping logistic regression (non-numeric suppress or insufficient data)")

    # Plots
    plt.figure(figsize=(8,6))
    # CCDF plot per suppress
    for sup, list_runs in sorted(grouped.items(), key=lambda x: (float('inf') if x[0] is None else x[0])):
        all_vec = np.concatenate(list_runs) if len(list_runs)>0 else np.array([])
        pos = all_vec[all_vec>0]
        if len(pos)==0:
            continue
        sorted_x = np.sort(pos)
        ccdf = 1.0 - np.arange(1, len(sorted_x)+1)/len(sorted_x)
        plt.loglog(sorted_x, ccdf, marker='.', linestyle='none', label=f"suppress={sup}")
    plt.xlabel("Fire size")
    plt.ylabel("CCDF")
    plt.title("CCDF of fire sizes by suppress")
    plt.legend()
    plt.grid(True, which='both', ls='--', alpha=0.4)
    plt.show()

    # megafire probability bar with CI
    plt.figure(figsize=(8,4))
    xs = np.arange(len(df_stats))
    plt.errorbar(xs, df_stats["megafire_prob"], yerr=[df_stats["megafire_prob"]-df_stats["megafire_prob_lo"], df_stats["megafire_prob_hi"]-df_stats["megafire_prob"]], fmt='o', capsize=4)
    plt.xticks(xs, [str(x) for x in df_stats["suppress"]], rotation=45)
    plt.xlabel("suppress")
    plt.ylabel(f"P(fire >= {megafire_threshold:.3g})")
    plt.title("Megafire probability by suppress")
    plt.grid(True, ls='--', alpha=0.3)
    plt.show()

    # chi-square / contingency: counts of runs with/without megafire by suppress
    contingency = []
    labels = []
    for sup, grp in df_runs.groupby("suppress"):
        n_with = grp["had_megafire"].sum()
        n_without = len(grp) - n_with
        contingency.append([n_with, n_without])
        labels.append(sup)
    if len(contingency) >= 2:
        chi2, pval, dof, expected = stats.chi2_contingency(np.array(contingency))
        print("Chi-square across suppress groups (run-level had_megafire): p =", pval)
    else:
        print("Not enough groups for chi-square test")

    return {"df_stats": df_stats, "df_runs": df_runs, "megafire_threshold": megafire_threshold}

# Example usage:
# res = analyze_suppression_paradox(exp_dir="experiments/suppression_test", megafire_quantile=0.995)

In [None]:
# Run (uncomment to execute)
L = 512

plt.figure()
results = []

suppresions = [0,1, 5, 10, 50, 100, 1000]

for sup in suppresions:

    res = simulate_drosselschwab_record(L, p=0.1, f=0.001, steps=100, suppress=sup)

    data = np.asarray(res[0])

    all_size = len(data)
    # remove non-positive values (log-scale requires >0)
    data = data[data > 0]

    if data.size == 0:
        print("No positive data to plot.")
    else:
        # choose number of bins (adjustable)
        nbins = 50
        # create log-spaced bins between min and max
        bins = np.logspace(np.log10(data.min()), np.log10(data.max()), nbins)
        counts, edges = np.histogram(data, bins=bins)
        # geometric centers of bins
        centers = np.sqrt(edges[:-1] * edges[1:])
        # filter zero-count bins for clearer log-log plotting
        nonzero = counts > 0
        plt.scatter(centers[nonzero], counts[nonzero]/len(data), s=20)  # dots

plt.xscale('log')
plt.yscale('log')
plt.xlabel('Fire size')
plt.ylabel('Count')
plt.title('Log-Log Fire Size Distribution (dots)')
plt.grid(True, which='both', ls='--', alpha=0.5)
plt.legend(suppresions)

In [None]:
print(len(np.array(res[0])), "fires total")
print(np.sum(np.array(res[0])==0), "fires supressed")

In [None]:
# replace the simple histogram call with a log-log histogram plotted as dots
import numpy as np

data = np.asarray(res[0])
# remove non-positive values (log-scale requires >0)
data = data[data > 0]

if data.size == 0:
    print("No positive data to plot.")
else:
    # choose number of bins (adjustable)
    nbins = 50
    # create log-spaced bins between min and max
    bins = np.logspace(np.log10(data.min()), np.log10(data.max()), nbins)
    counts, edges = np.histogram(data, bins=bins)
    # geometric centers of bins
    centers = np.sqrt(edges[:-1] * edges[1:])
    # filter zero-count bins for clearer log-log plotting
    nonzero = counts > 0

    plt.figure()
    plt.scatter(centers[nonzero], counts[nonzero], s=20, c='k')  # dots
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel('Fire size')
    plt.ylabel('Count')
    plt.title('Log-Log Fire Size Distribution (dots)')
    plt.grid(True, which='both', ls='--', alpha=0.5)

---
## Experiment 1: Sweep of f and p

How does the ratio of fire probability to growth probability affect fire size distributions?

In [None]:
EXP1_NAME = "f_over_p"
L, steps, runs_per_param = 256, 2000, 3

f_over_p_ratios = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0]
p_values = [0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2]


exp1_param_list = []
param_idx = 0
for p in p_values:
    for ratio in f_over_p_ratios:
        param_idx += 1
        for run_idx in range(runs_per_param):
            exp1_param_list.append({'L': L, 'p': p, 'f': p * ratio, 'steps': steps,
                                    'param_id': param_idx, 'run_id': run_idx})

print(f"Experiment 1: {len(exp1_param_list)} simulations, {param_idx} parameter sets")

In [None]:
# Run (uncomment to execute)
exp1_outdir = create_experiment_dir(EXP1_NAME)
exp1_results = run_parallel_simulations(exp1_param_list, exp1_outdir)
save_summary(exp1_results, exp1_outdir)

In [None]:
# Analyze
try:
    exp1_dir = get_latest_experiment_dir(EXP1_NAME)
    exp1_data = load_experiment_data(exp1_dir)
    exp1_summary = load_summary_map(exp1_dir)
    plot_fire_size_distribution(exp1_data, exp1_summary, "Exp 1: Fire Size by f/p Ratio",
                                 results.path("exp1_fire_size_dist.png"))
    plot_density_timeseries(exp1_data, exp1_summary, "Exp 1: Tree Density Over Time",
                            results.path("exp1_density_timeseries.png"))
except FileNotFoundError as e:
    print(f"No data: {e}")

---
## Experiment 3: Grid Size Effects (RQ1)

How does system size L affect scaling? Is log-log slope constant? How does cut-off change?

In [None]:
EXP3_NAME = "grid_size_rq1"
steps, runs_per_param = 2000, 3
p_fixed, f_fixed = 0.01, 0.0001

L_values = [32, 64, 128, 256, 512, 1028, 2056, 5012]

exp3_param_list = []
for param_idx, L in enumerate(L_values, 1):
    for run_idx in range(runs_per_param):
        exp3_param_list.append({'L': L, 'p': p_fixed, 'f': f_fixed, 'steps': steps,
                                'param_id': param_idx, 'run_id': run_idx})

print(f"Experiment 3: {len(exp3_param_list)} simulations, L = {L_values}")

In [None]:
# Run (uncomment to execute)
exp3_outdir = create_experiment_dir(EXP3_NAME)
exp3_results = run_parallel_simulations(exp3_param_list, exp3_outdir)
save_summary(exp3_results, exp3_outdir)

In [None]:
# Analyze
try:
    exp3_dir = get_latest_experiment_dir(EXP3_NAME)
    exp3_data = load_experiment_data(exp3_dir)
    exp3_summary = load_summary_map(exp3_dir)
    plot_fire_size_distribution(exp3_data, exp3_summary, "Exp 3: Fire Size by Grid Size L",
                                 results.path("exp3_fire_size_dist.png"))
    plot_cluster_size_distribution(exp3_data, exp3_summary, "Exp 3: Cluster Size by Grid Size L",
                                    results.path("exp3_cluster_size_dist.png"))
except FileNotFoundError as e:
    print(f"No data: {e}")

---
## Experiment 4: f Parameter Sweep

How does fire/lightning probability alone affect system dynamics? (Complement to Exp 2)

In [None]:
EXP4_NAME = "f_sweep"
L, steps, runs_per_param = 256, 5000, 3
p_fixed = 0.05

f_values = [0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01]

exp4_param_list = []
for param_idx, f in enumerate(f_values, 1):
    for run_idx in range(runs_per_param):
        exp4_param_list.append({'L': L, 'p': p_fixed, 'f': f, 'steps': steps,
                                'param_id': param_idx, 'run_id': run_idx})

print(f"Experiment 4: {len(exp4_param_list)} simulations")

In [None]:
# Run (uncomment to execute)
exp4_outdir = create_experiment_dir(EXP4_NAME)
exp4_results = run_parallel_simulations(exp4_param_list, exp4_outdir)
save_summary(exp4_results, exp4_outdir)

In [None]:
# Analyze
try:
    exp4_dir = get_latest_experiment_dir(EXP4_NAME)
    exp4_data = load_experiment_data(exp4_dir)
    exp4_summary = load_summary_map(exp4_dir)
    plot_fire_size_distribution(exp4_data, exp4_summary, "Exp 4: Fire Size by f Value",
                                 results.path("exp4_fire_size_dist.png"))
    plot_density_timeseries(exp4_data, exp4_summary, "Exp 4: Tree Density by f Value",
                            results.path("exp4_density_timeseries.png"))
except FileNotFoundError as e:
    print(f"No data: {e}")