## 1: Imports and Setup

In [1]:
# =============================================================================
# CELL 1: Imports and Setup
# =============================================================================

import sys
from pathlib import Path
import yaml
import xarray as xr
import numpy as np

# Set project root
# For script: use __file__
# For notebook: use Path.cwd()

# Running in notebook/REPL
project_root = Path.cwd().resolve()

print(f"Project root: {project_root}")

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

# Import dscim-new components
from dscim_new.config import DSCIMConfig
from dscim_new.config.schemas import (
    PipelineConfig,
    DiscountingConfig,
    PathsConfig,
    ClimateDataConfig,
    EconDataConfig,
    SectorConfig,
    DamageFunctionConfig,
    SCCConfig,
)
from dscim_new.pipeline.steps import (
    ReduceDamagesStep,
    GenerateDamageFunctionStep,
    CalculateSCCStep,
)
from dscim_new.utils import ClimateDataGenerator, DamagesDataGenerator

print("Imports successful")


Project root: /Users/sebastiancadavidsanchez/Documents/Github/cil/dscim-new/examples/notebooks
Imports successful


In [2]:
# =============================================================================
# 2: Configuration Setup - Choose Data Source
# =============================================================================

print("=" * 80)
print("STEP 1: CONFIGURATION SETUP")
print("=" * 80)

# Choose whether to use synthetic data or existing data
USE_SYNTHETIC_DATA = True  # Set to False to use dscim-testing data

if USE_SYNTHETIC_DATA:
    output_dir = project_root / "workflow_output"
    print(f"\nGenerating synthetic data in: {output_dir}")
else:
    output_dir = project_root / "dscim-testing" / "dummy_data"
    print(f"\nUsing existing data in: {output_dir}")


STEP 1: CONFIGURATION SETUP

Generating synthetic data in: /Users/sebastiancadavidsanchez/Documents/Github/cil/dscim-new/examples/notebooks/workflow_output


In [3]:

# =============================================================================
# 3: Generate Synthetic Data (Skip if using existing data)
# =============================================================================

if USE_SYNTHETIC_DATA:
    print("\nGenerating synthetic data...")

    # Create directories
    climate_dir = output_dir / "climate_data"
    damages_dir = output_dir / "damages_data"
    climate_dir.mkdir(parents=True, exist_ok=True)
    damages_dir.mkdir(parents=True, exist_ok=True)

    # Generate climate data
    print("  - Climate data...")
    climate_gen = ClimateDataGenerator(seed=42, verbose=False)
    climate_paths = climate_gen.generate_all_climate_data(str(climate_dir))

    # Generate damages and economic data
    print("  - Damages and economic data...")
    damages_gen = DamagesDataGenerator(seed=42, verbose=False)
    damages_paths = damages_gen.generate_all_damages_data(str(damages_dir))

    # Combine paths
    data_paths = {**climate_paths, **damages_paths}

    print(f"  Generated {len(data_paths)} data files")

else:
    # Use existing data paths from dscim-testing
    config_path = project_root / "dscim-testing" / "configs" / "dummy_config.yaml"

    if not config_path.exists():
        raise FileNotFoundError(f"Config not found: {config_path}")

    # Load to get paths
    with open(config_path, 'r') as f:
        conf_dict = yaml.safe_load(f)

    base_dir = project_root / "dscim-testing" / "dummy_data"
    data_paths = {
        "climate": str(base_dir / "climate"),
        "economic": str(base_dir / "econ" / "integration-econ-bc39.zarr"),
        "sector": str(base_dir / "sectoral" / "not_coastl_damages.zarr"),
    }

    print("  Using existing data")



Generating synthetic data...
  - Climate data...




  - Damages and economic data...




  Generated 8 data files


In [4]:
!tree -L 3 "workflow_output"

[1;36mworkflow_output[0m
├── [1;36mclimate_data[0m
│   ├── GMTanom_all_temp.csv
│   ├── ar6_fair162_sim.nc
│   ├── [1;36mcoastal_gmsl.zarr[0m
│   │   ├── [1;36mgmsl[0m
│   │   ├── [1;36mslr[0m
│   │   ├── [1;36myear[0m
│   │   └── zarr.json
│   ├── conversion.nc4
│   └── scenario_gmsl.nc4
├── [1;36mdamage_functions[0m
│   └── [1;36mnot_coastal[0m
│       └── [1;36m2020[0m
├── [1;36mdamages_data[0m
│   ├── [1;36mecon[0m
│   │   └── [1;36mintegration-econ.zarr[0m
│   └── [1;36msectoral[0m
│       ├── [1;36mcoastal_damages.zarr[0m
│       └── [1;36mnoncoastal_damages.zarr[0m
├── [1;36mreduced_damages[0m
│   └── [1;36mnot_coastal[0m
│       ├── [1;36madding_up_cc_reduced_damages.zarr[0m
│       ├── [1;36madding_up_no_cc_reduced_damages.zarr[0m
│       ├── [1;36mequity_cc_eta2.0_reduced_damages.zarr[0m
│       ├── [1;36mequity_no_cc_eta2.0_reduced_damages.zarr[0m
│       ├── [1;36mrisk_aversion_cc_eta2.0_reduced_damages.zarr[0m
│       └── [1;36m

### 4: Create Configuration Object

In [5]:
print("\nCreating configuration object...")

# Previous approach: Dictionary-based config
# with open(config_path, 'r') as f:
#     conf = yaml.safe_load(f)
# No validation, access via conf['key']['subkey']

# New approach: Pydantic-validated configuration
# Note: DSCIMConfig requires paths, econdata, and sectors

if USE_SYNTHETIC_DATA:
    # Build configuration with synthetic data paths
    config = DSCIMConfig(
        paths=PathsConfig(
            reduced_damages_library=str(output_dir / "reduced_damages"),
            ssp_damage_function_library=str(output_dir / "damage_functions"),
            AR6_ssp_results=str(output_dir / "scc_results"),
        ),
        econdata=EconDataConfig(global_ssp=data_paths["economic"]),
        sectors={
            "not_coastal": SectorConfig(
                sector_path=data_paths["noncoastal_damages"],
                histclim="histclim_dummy",  # Variable name in dataset
                delta="delta_dummy",  # Variable name in dataset
                formula="damages ~ -1 + anomaly + np.power(anomaly, 2)",
            )
        },
        climate_data=ClimateDataConfig(
            gmst_path=data_paths["gmst"],
            gmsl_path=data_paths["gmsl"],
            fair_temperature_path=data_paths["fair_temperature"],
            fair_gmsl_path=data_paths["fair_gmsl"],
            pulse_conversion_path=data_paths["pulse_conversion"],
        ),
        damage_function=DamageFunctionConfig(
            formula="damages ~ -1 + anomaly + np.power(anomaly, 2)",
            fit_type="ols",
        ),
        scc=SCCConfig(pulse_years=[2020]),
    )
else:
    # Load from existing config
    config = DSCIMConfig.from_yaml(str(config_path))
    # Override output paths
    config.paths.reduced_damages_library = str(output_dir / "reduced_damages")
    config.paths.ssp_damage_function_library = str(output_dir / "damage_functions")
    config.paths.AR6_ssp_results = str(output_dir / "scc_results")

print("Configuration created successfully")
print(f"Sectors available: {list(config.sectors.keys())}")



Creating configuration object...
Configuration created successfully
Sectors available: ['not_coastal']


## 5: Configure Pipeline Parameters

In [6]:


print("\n" + "=" * 80)
print("STEP 2: PIPELINE PARAMETERS")
print("=" * 80)

# Previous approach: Hard-coded parameters scattered in script
# eta_rhos = [[2.0, 0.0001]]
# reductions = ['cc', 'no_cc']
# recipe_discs = product(['adding_up', 'risk_aversion', 'equity'], [...])

# New approach: Centralized configuration
if config.pipeline is None:
    config.pipeline = PipelineConfig()

# Set recipes to process
config.pipeline.recipes = ["adding_up", "risk_aversion", "equity"]
config.pipeline.reductions = ["cc", "no_cc"]
config.pipeline.eta_values = [2.0]

# Configure discount methods (5 methods matching original)
config.discounting = [
    DiscountingConfig(discount_type="constant", discount_rate=0.02),
    DiscountingConfig(
        discount_type="ramsey", eta=2.0, rho=0.0001, ramsey_method="naive_ramsey"
    ),
    DiscountingConfig(
        discount_type="ramsey", eta=2.0, rho=0.0001, ramsey_method="euler_ramsey"
    ),
    DiscountingConfig(
        discount_type="gwr", eta=2.0, rho=0.0001, gwr_method="naive_gwr"
    ),
    DiscountingConfig(
        discount_type="gwr", eta=2.0, rho=0.0001, gwr_method="euler_gwr"
    ),
]

# Calculate expected combinations
n_recipes = len(config.pipeline.recipes)
n_discount_methods = len(config.discounting)
n_combinations = n_recipes * n_discount_methods

print(f"\nPipeline configured:")
print(f"  Recipes: {config.pipeline.recipes}")
print(f"  Reductions: {config.pipeline.reductions}")
print(f"  Discount methods: {n_discount_methods}")
print(f"\nExpected: {n_recipes} recipes x {n_discount_methods} methods = {n_combinations} combinations")



STEP 2: PIPELINE PARAMETERS

Pipeline configured:
  Recipes: ['adding_up', 'risk_aversion', 'equity']
  Reductions: ['cc', 'no_cc']
  Discount methods: 5

Expected: 3 recipes x 5 methods = 15 combinations


In [7]:
# =============================================================================
# CELL 6: Select Sector to Process
# =============================================================================

sector_name = list(config.sectors.keys())[0]
sector_config = config.sectors[sector_name]

print(f"\nProcessing sector: {sector_name}")
print(f"  Data path: {sector_config.sector_path}")
print(f"  Formula: {sector_config.formula}")


Processing sector: not_coastal
  Data path: /Users/sebastiancadavidsanchez/Documents/Github/cil/dscim-new/examples/notebooks/workflow_output/damages_data/sectoral/noncoastal_damages.zarr
  Formula: damages ~ -1 + anomaly + np.power(anomaly, 2)


In [8]:

# =============================================================================
# CELL 7: STEP 1 - Reduce Damages
# =============================================================================

print("\n" + "=" * 80)
print("STEP 3: REDUCE DAMAGES")
print("=" * 80)

# Previous Approach:
#   Multiple calls to reduce_damages() in nested loops
#   for sector, reduction in product(sectors, reductions):
#       for recipe in recipes:
#           reduce_damages(sector, config, recipe, reduction, ...)

# New Approach:
#   Explicit step execution with optional I/O
#   step = ReduceDamagesStep(config, sector, recipe, reduction)
#   result = step.run(inputs={...}, save=False)  # In-memory
#   OR
#   result = step.run(inputs={...}, save=True)   # Save to disk

# Key Difference:
#   - Results accessible for inspection before next step
#   - I/O is optional (save parameter)
#   - Each step independently executable

print(f"\nProcessing sector: {sector_name}")
print(f"Recipes: {config.pipeline.recipes}")
print(f"Reductions: {config.pipeline.reductions}")

# Store results for next step
reduced_damages_results = {}

for recipe in config.pipeline.recipes:
    for reduction in config.pipeline.reductions:
        print(f"\n  Processing: {recipe} x {reduction}")

        # Determine eta parameter based on recipe
        eta = 2.0 if recipe in ["risk_aversion", "equity"] else None

        # Create reduction step
        step = ReduceDamagesStep(
            config=config,
            sector=sector_name,
            recipe=recipe,
            reduction=reduction,
            eta=eta,
            verbose=False,
        )

        # Execute step
        # Note: save=False keeps result in memory
        # Note: save=True writes to config.paths.reduced_damages_library
        output = step.run(
            inputs={
                "sector_damages_path": sector_config.sector_path,
                "socioec_path": config.econdata.global_ssp,
            },
            save=True,  # Change to False to keep only in memory
        )

        # Store result for next step
        key = (recipe, reduction)
        reduced_damages_results[key] = output["reduced_damages"]

        # Inspect result (now possible with explicit data flow)
        result = reduced_damages_results[key]
        if isinstance(result, xr.Dataset):
            print(f"    Type: Dataset with {len(result.data_vars)} variables")
            print(f"    Variables: {list(result.data_vars)}")
        else:
            print(f"    Shape: {result.shape}")
            print(f"    Type: {type(result).__name__}")

print(f"\nDamage reduction complete: {len(reduced_damages_results)} combinations")




STEP 3: REDUCE DAMAGES

Processing sector: not_coastal
Recipes: ['adding_up', 'risk_aversion', 'equity']
Reductions: ['cc', 'no_cc']

  Processing: adding_up x cc




    Type: Dataset with 1 variables
    Variables: ['cc']

  Processing: adding_up x no_cc




    Type: Dataset with 1 variables
    Variables: ['no_cc']

  Processing: risk_aversion x cc




    Type: Dataset with 1 variables
    Variables: ['cc']

  Processing: risk_aversion x no_cc




    Type: Dataset with 1 variables
    Variables: ['no_cc']

  Processing: equity x cc




    Type: Dataset with 1 variables
    Variables: ['cc']

  Processing: equity x no_cc




    Type: Dataset with 1 variables
    Variables: ['no_cc']

Damage reduction complete: 6 combinations


In [9]:
# =============================================================================
# CELL 9: STEP 2 - Generate Damage Functions
# =============================================================================

print("\n" + "=" * 80)
print("STEP 4: GENERATE DAMAGE FUNCTIONS")
print("=" * 80)

# Previous Approach:
#   Embedded in run_ssps(), not separately accessible
#   run_ssps(sectors, pulse_years, menu_discs, ...)
#   Damage function generation happens internally

# New Approach:
#   Explicit step with accessible inputs/outputs
#   step = GenerateDamageFunctionStep(config, sector, pulse_year)
#   result = step.run(inputs={'reduced_damages': data}, save=False)

# Key Difference:
#   - Damage function results accessible before SCC calculation
#   - Can inspect fit quality, coefficients, marginal damages
#   - Save or keep in memory independently

pulse_year = config.scc.pulse_years[0]
damage_function_results = {}

print(f"\nGenerating damage functions:")
print(f"  Sector: {sector_name}")
print(f"  Pulse year: {pulse_year}")

for recipe in config.pipeline.recipes:
    print(f"\n  Processing: {recipe}")

    # Use 'cc' reduction for damage function generation
    key = (recipe, "cc")
    if key not in reduced_damages_results:
        print(f"    Warning: No reduced damages found, skipping")
        continue

    reduced_damages = reduced_damages_results[key]

    # Create damage function step
    # Note: Step doesn't need recipe/eta - those are embedded in reduced_damages
    step = GenerateDamageFunctionStep(
        config=config,
        sector=sector_name,
        pulse_year=pulse_year,
        verbose=False,
    )

    # Execute step with explicit input
    output = step.run(
        inputs={"reduced_damages": reduced_damages},
        save=True,  # Change to False for in-memory only
    )

    # Store results
    damage_function_results[recipe] = output

    # Inspect outputs (now accessible)
    coefs = output["damage_function_coefficients"]
    marg_dmg = output["marginal_damages"]

    if isinstance(coefs, xr.Dataset):
        print(f"    Coefficients: Dataset with {len(coefs.data_vars)} variables")
    else:
        print(f"    Coefficients shape: {coefs.shape}")

    if isinstance(marg_dmg, xr.Dataset):
        print(f"    Marginal damages: Dataset with {len(marg_dmg.data_vars)} variables")
    else:
        print(f"    Marginal damages shape: {marg_dmg.shape}")

print(f"\nDamage functions generated: {len(damage_function_results)} recipes")



STEP 4: GENERATE DAMAGE FUNCTIONS

Generating damage functions:
  Sector: not_coastal
  Pulse year: 2020

  Processing: adding_up




    Coefficients shape: (2,)
    Marginal damages shape: (11, 3)

  Processing: risk_aversion




    Coefficients shape: (2,)
    Marginal damages shape: (11, 3)

  Processing: equity
    Coefficients shape: (2,)
    Marginal damages shape: (11, 3)

Damage functions generated: 3 recipes




In [10]:

# =============================================================================
# CELL 10: Inspect Damage Functions (Optional)
# =============================================================================

print("\nInspecting damage function results...")

# Example: Look at coefficients for one recipe
sample_recipe = list(damage_function_results.keys())[0]
sample_df = damage_function_results[sample_recipe]

print(f"\nSample recipe: {sample_recipe}")
print(f"  Coefficients shape: {sample_df['damage_function_coefficients'].shape}")
print(f"  Marginal damages shape: {sample_df['marginal_damages'].shape}")

# Access coefficient values
coefs = sample_df['damage_function_coefficients']
print(f"  Coefficients: {coefs.values}")


Inspecting damage function results...

Sample recipe: adding_up
  Coefficients shape: (2,)
  Marginal damages shape: (11, 3)
  Coefficients: [  823.83284856 -2951.74393764]


In [11]:

# =============================================================================
# CELL 11: Load Economic Data for SCC Calculation
# =============================================================================

print("\n" + "=" * 80)
print("STEP 5: CALCULATE SCC - Load Economic Data")
print("=" * 80)

# Load consumption data (required for SCC calculation)
print("\nLoading economic data...")
try:
    econ_data = xr.open_zarr(config.econdata.global_ssp, chunks=None)
    consumption = econ_data["gdppc"]
    print(f"  Consumption loaded: {consumption.shape}")
    print(f"  Dimensions: {list(consumption.dims)}")
except Exception as e:
    print(f"  Error: {e}")
    consumption = None


STEP 5: CALCULATE SCC - Load Economic Data

Loading economic data...
  Consumption loaded: (11, 5, 2, 2)
  Dimensions: ['year', 'ssp', 'region', 'model']


In [12]:

# =============================================================================
# CELL 12: Calculate SCC for All Combinations
# =============================================================================

print("\n" + "=" * 80)
print("STEP 5: CALCULATE SCC - All Recipe-Discount Combinations")
print("=" * 80)

# Previous Approach:
#   Embedded in run_ssps(), all processed internally
#   run_ssps(sectors, pulse_years, menu_discs, eta_rhos, ...)
#   No visibility into which combination is running

# New Approach:
#   Explicit iteration with progress tracking
#   for recipe in recipes:
#       for discount_config in discount_configs:
#           step = CalculateSCCStep(...)
#           result = step.run(...)

# Key Difference:
#   - See exactly which combination is processing
#   - Access all intermediate outputs (discount factors, consumption, etc.)
#   - Errors in one combination don't stop others
#   - Can process subsets

if consumption is None:
    print("Cannot proceed without consumption data")
else:
    scc_results = {}
    combination_count = 0

    print(f"\nCalculating SCC for {n_combinations} combinations:")
    print(f"  {n_recipes} recipes x {n_discount_methods} discount methods")

    for recipe in config.pipeline.recipes:
        if recipe not in damage_function_results:
            print(f"\n  Warning: No damage function for {recipe}, skipping")
            continue

        marginal_damages = damage_function_results[recipe]["marginal_damages"]

        for disc_idx, discount_config in enumerate(config.discounting):
            combination_count += 1

            # Create readable name
            discount_name = discount_config.discount_type
            if hasattr(discount_config, "ramsey_method") and discount_config.ramsey_method:
                discount_name = discount_config.ramsey_method
            elif hasattr(discount_config, "gwr_method") and discount_config.gwr_method:
                discount_name = discount_config.gwr_method

            print(f"\n  [{combination_count}/{n_combinations}] {recipe} x {discount_name}")

            # Create SCC calculation step
            step = CalculateSCCStep(
                config=config,
                sector=sector_name,
                pulse_year=pulse_year,
                recipe=recipe,
                discount_config_index=disc_idx,
                verbose=False,
            )

            # Execute step with explicit inputs
            try:
                output = step.run(
                    inputs={
                        "marginal_damages": marginal_damages,
                        "consumption": consumption,
                    },
                    save=True,  # Change to False for in-memory only
                )

                # Store result
                key = (recipe, discount_name)
                scc_results[key] = output

                # Inspect outputs (now accessible)
                scc = output["scc"]
                print(f"    SCC shape: {scc.shape}")
                print(f"    SCC mean: {float(scc.mean()):.2f}")

                # Additional outputs accessible
                if "discount_factors" in output:
                    print(f"    Discount factors: {output['discount_factors'].shape}")
                if "global_consumption" in output:
                    print(f"    Global consumption: {output['global_consumption'].shape}")

            except Exception as e:
                print(f"    Error: {str(e)[:100]}")

    print(f"\nSCC calculation complete: {len(scc_results)} combinations")



STEP 5: CALCULATE SCC - All Recipe-Discount Combinations

Calculating SCC for 15 combinations:
  3 recipes x 5 discount methods

  [1/15] adding_up x naive_gwr
    SCC shape: (3,)
    SCC mean: 647.37
    Discount factors: (11,)

  [2/15] adding_up x naive_gwr




    SCC shape: (3, 5, 2)
    SCC mean: 625.16
    Discount factors: (11, 5, 2, 2)

  [3/15] adding_up x naive_gwr
    SCC shape: (3, 5, 2)
    SCC mean: 625.16
    Discount factors: (11, 5, 2, 2)

  [4/15] adding_up x naive_gwr
    SCC shape: (3, 2)
    SCC mean: 621.79
    Discount factors: (11, 2)

  [5/15] adding_up x euler_gwr




    SCC shape: (3, 2)
    SCC mean: 621.79
    Discount factors: (11, 2)

  [6/15] risk_aversion x naive_gwr
    SCC shape: (3,)
    SCC mean: 642.94
    Discount factors: (11,)

  [7/15] risk_aversion x naive_gwr
    SCC shape: (3, 5, 2)
    SCC mean: 620.44
    Discount factors: (11, 5, 2, 2)

  [8/15] risk_aversion x naive_gwr




    SCC shape: (3, 5, 2)
    SCC mean: 620.44
    Discount factors: (11, 5, 2, 2)

  [9/15] risk_aversion x naive_gwr
    SCC shape: (3, 2)
    SCC mean: 617.08
    Discount factors: (11, 2)

  [10/15] risk_aversion x euler_gwr
    SCC shape: (3, 2)
    SCC mean: 617.08
    Discount factors: (11, 2)

  [11/15] equity x naive_gwr




    SCC shape: (3,)
    SCC mean: 642.94
    Discount factors: (11,)

  [12/15] equity x naive_gwr
    SCC shape: (3, 5, 2)
    SCC mean: 620.44
    Discount factors: (11, 5, 2, 2)

  [13/15] equity x naive_gwr




    SCC shape: (3, 5, 2)
    SCC mean: 620.44
    Discount factors: (11, 5, 2, 2)

  [14/15] equity x naive_gwr
    SCC shape: (3, 2)
    SCC mean: 617.08
    Discount factors: (11, 2)

  [15/15] equity x euler_gwr
    SCC shape: (3, 2)
    SCC mean: 617.08
    Discount factors: (11, 2)

SCC calculation complete: 6 combinations
