# Model Calibration using Genetic Algorithm

This notebook implements a genetic algorithm to optimize the parameters of the forest fire spread model using real fire data from the MTBS dataset. The optimization is based on the Sørensen index (also known as the Dice coefficient), which measures the spatial agreement between simulated and observed burned areas.

In [None]:
# Import necessary libraries
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import random
import json
from scipy.stats import pearsonr
from deap import base, creator, tools, algorithms
import multiprocessing

# Add src directory to path
sys.path.append(os.path.join(os.getcwd(), 'src'))

# Import our model from the src directory
from model import CA

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)

## Finding Available Fire Data

First, we'll identify the available fire datasets in the data directory that we can use for calibration.

In [None]:
fires = ['alabama', 'arizona']

## Setting Up the Calibration Framework

We'll define functions to evaluate model performance against real fire data using the Sørensen index.

In [None]:
def init_model_from_fire_data(fire, grid_size=(100, 100)):
    """Initialize a CA model from fire data"""
    # Create a new CA model
    model = CA(grid_size=grid_size)

    # Initialize from MTBS data
    success = model.initialize_from_mtbs_data(fire)

    return model if success else None

def evaluate_model_performance(model, simulation_steps=20, params=None):
    """Run a model simulation and evaluate performance against actual fire data"""
    if params:
        # Set model parameters
        model.p0 = params.get('p0', 0.5)
        model.c1 = params.get('c1', 0.5)
        model.c2 = params.get('c2', 0.5)

    # Run the simulation
    history = model.run_simulation(simulation_steps)

    # Compare with actual burned area
    if model.actual_burned_area is not None:
        simulated_burned = (model.grid == 2).astype(int)  # Cells with state 2 are burnt

        # Calculate Sørensen index (Dice coefficient)
        true_positives = np.sum((simulated_burned == 1) & (model.actual_burned_area == 1))
        false_positives = np.sum((simulated_burned == 1) & (model.actual_burned_area == 0))
        false_negatives = np.sum((simulated_burned == 0) & (model.actual_burned_area == 1))

        sorensen = 2 * true_positives / (2 * true_positives + false_positives + false_negatives) if (2 * true_positives + false_positives + false_negatives) > 0 else 0

        return sorensen
    else:
        return 0.0  # No actual data to compare with

## Implementing the Genetic Algorithm for Parameter Optimization

Now we'll implement a genetic algorithm to find the optimal parameters for our fire spread model.

In [None]:
# Define parameter ranges
PARAM_RANGES = {
    'p0': (0.1, 0.9),   # Base ignition probability
    'c1': (0.1, 1.0),   # Wind effect parameter 1
    'c2': (0.1, 1.0)    # Wind effect parameter 2
}

# Define genetic algorithm fitness function
def evaluate_individual(individual, fire_folder, simulation_steps=20):
    """Evaluate fitness of a GA individual (parameter set)"""
    # Convert GA individual to parameter dictionary
    params = {
        'p0': individual[0],
        'c1': individual[1],
        'c2': individual[2]
    }

    # Initialize model
    model = init_model_from_fire_data(fire_folder)
    if not model:
        return (0.0,)  # Return tuple with single value for DEAP

    # Evaluate with these parameters
    sorensen = evaluate_model_performance(model, simulation_steps, params)

    return (sorensen,)  # Return tuple with single value for DEAP

# Setup genetic algorithm
def setup_genetic_algorithm():
    """Setup DEAP genetic algorithm framework"""
    # We want to maximize the Sørensen index
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMax)

    toolbox = base.Toolbox()

    # Register attribute generators
    toolbox.register("attr_p0", random.uniform, PARAM_RANGES['p0'][0], PARAM_RANGES['p0'][1])
    toolbox.register("attr_c1", random.uniform, PARAM_RANGES['c1'][0], PARAM_RANGES['c1'][1])
    toolbox.register("attr_c2", random.uniform, PARAM_RANGES['c2'][0], PARAM_RANGES['c2'][1])

    # Register individual and population creation
    toolbox.register("individual", tools.initCycle, creator.Individual,
                     (toolbox.attr_p0, toolbox.attr_c1, toolbox.attr_c2), n=1)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

    # Register genetic operators
    toolbox.register("mate", tools.cxBlend, alpha=0.5)
    toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.1, indpb=0.2)
    toolbox.register("select", tools.selTournament, tournsize=3)

    return toolbox

# Check for DEAP creator reset
if hasattr(creator, "FitnessMax"):
    del creator.FitnessMax
if hasattr(creator, "Individual"):
    del creator.Individual

## Running the Genetic Algorithm

Now we'll run the genetic algorithm to find the optimal parameters for our fire spread model.

In [None]:
def run_genetic_algorithm(fire_folder, pop_size=30, n_gen=10, simulation_steps=20):
    """Run genetic algorithm to optimize model parameters"""
    print(f"Running genetic algorithm optimization for {fire_folder}...")

    # Setup GA
    toolbox = setup_genetic_algorithm()

    # Register evaluation function with the specific fire dataset
    toolbox.register("evaluate", evaluate_individual, fire_folder=fire_folder, simulation_steps=simulation_steps)

    # Initialize population
    pop = toolbox.population(n=pop_size)

    # Initialize statistics
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)

    # Run GA
    hof = tools.HallOfFame(1)  # Keep track of best individual
    pop, logbook = algorithms.eaSimple(pop, toolbox, cxpb=0.7, mutpb=0.2, ngen=n_gen,
                                      stats=stats, halloffame=hof, verbose=True)

    # Get best individual
    best_ind = hof[0]
    best_params = {
        'p0': best_ind[0],
        'c1': best_ind[1],
        'c2': best_ind[2]
    }
    best_fitness = best_ind.fitness.values[0]

    print(f"\nOptimization complete.")
    print(f"Best parameters: {best_params}")
    print(f"Best Sørensen index: {best_fitness:.4f}")

    # Plot GA progress
    gen = logbook.select("gen")
    avg = logbook.select("avg")
    max_fitness = logbook.select("max")

    plt.figure(figsize=(10, 6))
    plt.plot(gen, avg, label="Average")
    plt.plot(gen, max_fitness, label="Best")
    plt.xlabel("Generation")
    plt.ylabel("Fitness (Sørensen Index)")
    plt.title("Genetic Algorithm Optimization Progress")
    plt.legend()
    plt.grid(True)
    plt.show()

    return best_params, best_fitness, logbook

# Run GA for a single fire dataset
calibration_fire = fires[0]
    # Run with smaller population and fewer generations for testing
best_params, best_fitness, logbook = run_genetic_algorithm(
    fire_folder=calibration_fire,
    pop_size=20,  # Small population for demonstration
    n_gen=5,      # Few generations for demonstration
    simulation_steps=20
)

## Validating the Optimized Parameters

Let's validate our optimized parameters by comparing the model performance with default versus optimized parameters.

## Cross-Validation on Multiple Fire Datasets

To ensure our optimized parameters generalize well, let's cross-validate them on multiple fire datasets.

In [None]:
def cross_validate_parameters(params, fire_folders, simulation_steps=20):
    """Cross-validate parameters on multiple fire datasets"""
    print(f"Cross-validating parameters on {len(fire_folders)} fire datasets...")

    results = {}
    for fire_folder in fire_folders:
        print(f"\nValidating on {fire_folder}...")

        # Initialize model
        model = init_model_from_fire_data(fire_folder)

        # Evaluate with given parameters
        sorensen = evaluate_model_performance(model, simulation_steps, params)
        print(f"Sørensen index: {sorensen:.4f}")

        results[fire_folder] = sorensen

    # Calculate average performance
    if results:
        avg_sorensen = sum(results.values()) / len(results)
        print(f"\nAverage Sørensen index across all datasets: {avg_sorensen:.4f}")

        # Plot results
        plt.figure(figsize=(10, 6))
        plt.bar(results.keys(), results.values())
        plt.axhline(y=avg_sorensen, color='r', linestyle='--', label=f"Average: {avg_sorensen:.4f}")
        plt.xlabel("Fire Dataset")
        plt.ylabel("Sørensen Index")
        plt.title("Parameter Performance Across Fire Datasets")
        plt.xticks(rotation=45)
        plt.legend()
        plt.tight_layout()
        plt.show()

    return results, avg_sorensen if results else 0.0

# Cross-validate if we have optimized parameters
results, avg_performance = cross_validate_parameters(
        params=best_params,
        fire_folders=fires,
        simulation_steps=20
)

## Parameter Sensitivity Analysis

Let's analyze how sensitive the model is to different parameter values.

In [None]:
def parameter_sensitivity_analysis(fire_folder, base_params, parameter_name, value_range, steps=10, simulation_steps=20):
    """Analyze the sensitivity of model to a specific parameter"""
    print(f"Running sensitivity analysis for {parameter_name} on {fire_folder}...")

    values = np.linspace(value_range[0], value_range[1], steps)
    scores = []

    for value in values:
        # Create new parameter set with the changed parameter
        params = base_params.copy()
        params[parameter_name] = value

        # Initialize model
        model = init_model_from_fire_data(fire_folder)

        # Evaluate with this parameter value
        sorensen = evaluate_model_performance(model, simulation_steps, params)
        scores.append(sorensen)
        print(f"{parameter_name} = {value:.3f} -> Sørensen = {sorensen:.4f}")

    # Plot sensitivity curve
    plt.figure(figsize=(10, 6))
    plt.plot(values, scores, 'o-')
    plt.axvline(x=base_params[parameter_name], color='r', linestyle='--',
                label=f"Optimized value: {base_params[parameter_name]:.3f}")
    plt.xlabel(parameter_name)
    plt.ylabel("Sørensen Index")
    plt.title(f"Sensitivity Analysis for {parameter_name}")
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()

    return values, scores

# Run sensitivity analysis if we have optimized parameters
analysis_fire = fires[0]

# Analyze sensitivity to p0 (base ignition probability)
p0_values, p0_scores = parameter_sensitivity_analysis(
    fire_folder=analysis_fire,
    base_params=best_params,
    parameter_name='p0',
    value_range=PARAM_RANGES['p0'],
    steps=8,  # Fewer steps for demonstration
    simulation_steps=20
)

# Analyze sensitivity to c1 (wind effect parameter 1)
c1_values, c1_scores = parameter_sensitivity_analysis(
    fire_folder=analysis_fire,
    base_params=best_params,
    parameter_name='c1',
    value_range=PARAM_RANGES['c1'],
    steps=8,  # Fewer steps for demonstration
    simulation_steps=20
    )

## Save Optimized Parameters

Finally, let's save the optimized parameters to a file for later use.