# edges - Biodiversity, GLAM

0. Load data
1. Spatial Overlay: Intersect country boundaries with WWF ecoregions
2. Weight Calculation: Compute the percentage of each country occupied by each ecoregion
3. CF Matching: Merge Scherer et al. CFs based on eco_id
4. Output Generation: Creates a structured dataset ready for edges integration

In [1]:
import geopandas as gpd
import pandas as pd
import json
from collections import defaultdict
import country_converter as coco

# 0. Load data

## Biodiversity Characterisation Factors
https://zenodo.org/records/10114493

In [2]:
cfs_country = pd.read_csv("data/CF_domain_country.csv", encoding='latin1')
cfs_country.columns

Index(['objectid', 'iso3cd', 'romnam', 'm49code', 'species_group', 'kingdom',
       'habitat_id', 'habitat', 'CF_occ_avg_reg', 'CF_occ_avg_reg_rsd',
       'CF_occ_avg_glo', 'CF_occ_avg_glo_rsd', 'CF_occ_mar_reg',
       'CF_occ_mar_reg_rsd', 'CF_occ_mar_glo', 'CF_occ_mar_glo_rsd',
       'CF_tra_avg_reg', 'CF_tra_avg_reg_rsd', 'CF_tra_avg_glo',
       'CF_tra_avg_glo_rsd', 'CF_tra_mar_reg', 'CF_tra_mar_reg_rsd',
       'CF_tra_mar_glo', 'CF_tra_mar_glo_rsd', 'quality_reg', 'quality_glo'],
      dtype='object')

In [3]:
cfs = pd.read_csv("data/CF_domain.csv", encoding='latin1')
cfs.columns

Index(['realm', 'biome', 'eco_id', 'eco_name', 'species_group', 'kingdom',
       'habitat_id', 'habitat', 'CF_occ_avg_reg', 'CF_occ_avg_glo',
       'CF_occ_mar_reg', 'CF_occ_mar_glo', 'CF_tra_avg_reg', 'CF_tra_avg_glo',
       'CF_tra_mar_reg', 'CF_tra_mar_glo', 'quality_reg', 'quality_glo'],
      dtype='object')

## Ecoregions
https://www.worldwildlife.org/publications/terrestrial-ecoregions-of-the-world

In [4]:
ecoregions = gpd.read_file("data/official/wwf_terr_ecos.shp")
# Ensure all are in the same CRS (WGS84)
ecoregions = ecoregions.to_crs("EPSG:4326")
# Create eco_id in ecoregions to match with CFs (from float to int)
ecoregions['eco_id'] = ecoregions['ECO_ID'].astype(int)
ecoregions.columns

Index(['OBJECTID', 'AREA', 'PERIMETER', 'ECO_NAME', 'REALM', 'BIOME',
       'ECO_NUM', 'ECO_ID', 'ECO_SYM', 'GBL_STAT', 'G200_REGIO', 'G200_NUM',
       'G200_BIOME', 'G200_STAT', 'Shape_Leng', 'Shape_Area', 'area_km2',
       'eco_code', 'PER_area', 'PER_area_1', 'PER_area_2', 'geometry',
       'eco_id'],
      dtype='object')

## Countries
https://www.naturalearthdata.com/downloads/10m-cultural-vectors/
We use map units, as we prefer, for example, to distinguish overseas regions of France.
Dataset: Admin 0 â€“ Details --> Download map units (4.77 MB) version 5.1.1

In [5]:
countries = gpd.read_file("data/ne_10m_admin_0_map_units/ne_10m_admin_0_map_units.shp")
# Ensure all are in the same CRS (WGS84)
countries = countries.to_crs("EPSG:4326")
countries.columns

Index(['featurecla', 'scalerank', 'LABELRANK', 'SOVEREIGNT', 'SOV_A3',
       'ADM0_DIF', 'LEVEL', 'TYPE', 'TLC', 'ADMIN',
       ...
       'FCLASS_TR', 'FCLASS_ID', 'FCLASS_PL', 'FCLASS_GR', 'FCLASS_IT',
       'FCLASS_NL', 'FCLASS_SE', 'FCLASS_BD', 'FCLASS_UA', 'geometry'],
      dtype='object', length=169)

## Mining facilities and production data
https://zenodo.org/records/7369478

In [6]:
facilities = gpd.read_file("data/mining/jasansky/data/facilities.gpkg")
production = pd.read_csv("data/mining/jasansky/data/commodities.csv")

In [7]:
print(f"Loaded {len(ecoregions)} ecoregions")
print(f"Loaded {len(countries)} countries")
print(f"Loaded {len(cfs)} CF records")
print(f"Loaded {len(facilities['facility_id'].unique())} facilities")
print(f"Loaded {len(production['facility_id'].unique())} facilities with production records")

Loaded 14458 ecoregions
Loaded 298 countries
Loaded 12375 CF records
Loaded 2413 facilities
Loaded 507 facilities with production records


# 1. Mining Data preparation

In [8]:
### Group reserves by facility and select the newest year for each commodity

cols_to_keep = ["commodity", "value_tonnes"]

df_grouped_production = (
    production.groupby(["facility_id", "year"])
    .apply(lambda g: g[cols_to_keep].to_dict(orient="records"), include_groups=False)
    .reset_index(name="prod_data")
)

def get_newest_data(group):
    """
    Select the newest data per commodity for each facility
    """
    group = group.sort_values("year", ascending=False).reset_index(drop=True)
    
    # Start with newest year as base
    newest_year = group.loc[0, 'year']
    combined = {
        item['commodity']: {**item, 'year': newest_year}
        for item in group.loc[0, 'prod_data']
    }
    
    # Add commodities from older years if missing
    for _, row in group.iloc[1:].iterrows():
        year = row["year"]
        for item in row["prod_data"]:
            commodity = item.get("commodity")
            if commodity not in combined:
                combined[commodity] = {**item, "year": year}
    
    return pd.Series({'prod_data': list(combined.values())})

# Apply the function per facility
productionPerFacility = (
    df_grouped_production.groupby('facility_id')
    .apply(get_newest_data, include_groups=False)
    .reset_index()
)

In [9]:
mainFacilities = facilities[facilities['facility_id'].str.endswith("00") & ~facilities.geometry.is_empty].copy()

# Keep only these that have not stopped production
mainFacilities = mainFacilities[(mainFacilities['production_end'].isnull())].copy()

# Drop unnecessary columns
mainFacilities = mainFacilities.drop(columns=[
    'facility_name', 'facility_other_names', 'sub_site_name', 'sub_site_other_names', 'primary_commodity', 'commodities_products', 'production_start', 'GID_0', 'GID_1', 'GID_2', 'GID_3', 'GID_4', 'production_start', 'production_end', 'surface_area_sq_km', 'concession_area_sq_km'
])

# Merge production with whole facilities, only keep those with geometry
facilitiesProduction = mainFacilities.merge(productionPerFacility, on='facility_id', how='inner')

print(len(facilitiesProduction), "whole facilities with production data and geometry")

462 whole facilities with production data and geometry


In [10]:
from shapely.geometry import Point, MultiPoint, GeometryCollection

def first_point_from_geom(geom):
    if geom is None:
        return None
    try:
        if geom.is_empty:
            return geom
    except Exception:
        return geom

    gtype = geom.geom_type
    if gtype == "MultiPoint":
        # return first non-empty point
        for p in geom.geoms:
            if not p.is_empty:
                return p
        return geom  # fallback
    if gtype == "GeometryCollection":
        for part in geom:
            if part.geom_type == "Point" and not part.is_empty:
                return part
        # fallback to first element if any
        if len(geom) > 0:
            return geom[0]
        return geom
    # if already a Point (or other single geometry), keep as is
    return geom

# inspection before conversion
print("Geometry types before:")
print(facilitiesProduction.geometry.geom_type.value_counts())

# Convert multipoints to first point
facilitiesProduction['geometry'] = facilitiesProduction.geometry.apply(first_point_from_geom)

# Re-wrap as GeoDataFrame to ensure geometry column is recognized
facilitiesProduction = gpd.GeoDataFrame(facilitiesProduction, geometry='geometry', crs=facilitiesProduction.crs)

# inspection after conversion
print("\nGeometry types after:")
print(facilitiesProduction.geometry.geom_type.value_counts())

Geometry types before:
MultiPoint    462
Name: count, dtype: int64

Geometry types after:
Point    462
Name: count, dtype: int64


# 2. Weight Calculation

In [11]:
materials = pd.read_excel("data/mining/materials_prices.xlsx")
prices_lookup = materials.set_index('material_id')['price/kg'].to_dict()

In [None]:
### Calculate the economic share of each mine in its country

# Explode the list of dicts
df_expanded = facilitiesProduction.explode('prod_data', ignore_index=True)

# Normalize the dict into columns
prod_cols = pd.json_normalize(df_expanded['prod_data'])
df_expanded = df_expanded.drop(columns=['prod_data']).join(prod_cols)

# Get the prices per commodity
df_expanded['price_per_kg'] = df_expanded['commodity'].map(prices_lookup)

# Calculate the revenue per commodity
df_expanded['revenue'] = df_expanded['value_tonnes'] * 1000 * df_expanded['price_per_kg']

# Aggregate revenue per facility and country
facility_revenue = (
    df_expanded
    .groupby(['facility_id', 'country'], as_index=False)['revenue']
    .sum()
)

# Calculate total revenue per country
facility_revenue['country_total'] = (
    facility_revenue.groupby('country')['revenue'].transform('sum')
)

# Calculate share of revenue per facility in its country
facility_revenue['share_in_country'] = (
    facility_revenue['revenue'] / facility_revenue['country_total']
)

# Check that shares per country sum to 1
country_shares = facility_revenue.groupby('country')['share_in_country'].sum().round(1)
display(f"All country shares sum to: {country_shares.unique()}")

#facility_factors = gpd.GeoDataFrame(facility_revenue, geometry='geometry', crs=facilitiesProduction.crs)

'All country shares sum to: [1.]'

In [None]:
display(facility_revenue)

# 3. CF matching

### Dataframe matching

In [None]:
country_ecoregion_cfs = country_ecoregion.merge(
    cfs,
    on='eco_id',
    how='left'
)

In [None]:
output_cols = [
    'ISO_A3',
    'NAME',
    'eco_id',
    'ECO_NAME',
    'intersection_area_km2',
    'weight_factor',
    'species_group',
    'habitat_id',
    'habitat',
    'CF_occ_avg_reg',
    'CF_occ_avg_glo',
    'CF_occ_mar_reg',
    'CF_occ_mar_glo',
    'CF_tra_avg_reg',
    'CF_tra_avg_glo',
    'CF_tra_mar_reg',
    'CF_tra_mar_glo',
    'quality_reg',
    'quality_glo'
]

final_output = country_ecoregion_cfs[output_cols].copy()

# Rename columns for clarity
final_output.columns = [
    'country_iso',
    'country_name',
    'ecoregion_id',
    'ecoregion_name',
    'area_km2',
    'weight_factor',
    'species_group',
    'habitat_id',
    'habitat_type',
    'cf_occupation_avg_regional',
    'cf_occupation_avg_global',
    'cf_occupation_marginal_regional',
    'cf_occupation_marginal_global',
    'cf_transformation_avg_regional',
    'cf_transformation_avg_global',
    'cf_transformation_marginal_regional',
    'cf_transformation_marginal_global',
    'quality_regional',
    'quality_global'
]

final_output = final_output[final_output['species_group'].notna()].copy()

In [None]:
final_output = final_output.sort_values(
    ['country_name', 'ecoregion_id', 'species_group', 'habitat_id'],
    ascending=True
)

In [None]:
final_output.to_csv("output/test_severin_biomes_country_ecoregion_cfs_scherer.csv", index=False)
final_output.head()

In [None]:
# Get unique ecoregions per country and sum their weights
spain_check = final_output[final_output['country_iso'] == 'ESP'].groupby('ecoregion_id')['weight_factor'].first().sum()
spain_check

### Matching habitats to ecoinvent

Matching done according to: https://doi.org/10.1007/s11367-021-02003-y

In [None]:
import bw2data as bd
import bw2io as bi
bd.projects.set_current("bw25_ei311")
bf = bd.Database('biosphere')
sorted([m["categories"] for m in bf if "Transformation, from" in m["name"]])

In [None]:
occupation_flows = {
    'Occupation, annual crop': 'Cropland_Intense',
    'Occupation, annual crop, flooded crop': 'Cropland_Intense',
    'Occupation, annual crop, greenhouse': 'Urban_Light',
    'Occupation, annual crop, irrigated': 'Cropland_Intense',
    'Occupation, annual crop, irrigated, extensive': 'Cropland_Light',
    'Occupation, annual crop, irrigated, intensive': 'Cropland_Intense',
    'Occupation, annual crop, non-irrigated': 'Cropland_Intense',
    'Occupation, annual crop, non-irrigated, extensive': 'Cropland_Minimal',
    'Occupation, annual crop, non-irrigated, intensive': 'Cropland_Intense',
    'Occupation, arable land, unspecified use': 'Cropland_Intense',
    # 'Occupation, arable, conservation tillage (obsolete)': '',
    # 'Occupation, arable, conventional tillage (obsolete)': '',
    # 'Occupation, arable, reduced tillage (obsolete)': '',
    # 'Occupation, bare area (non-use)': '',
    'Occupation, construction site': 'Urban_Intense',
    'Occupation, cropland fallow (non-use)': 'Cropland_Minimal',
    'Occupation, dump site': 'Urban_Intense',
    'Occupation, field margin/hedgerow': 'Cropland_Minimal',
    'Occupation, forest, extensive': 'Pasture_Light',
    'Occupation, forest, intensive': 'Managed_forest_Intense',
    'Occupation, forest, primary (non-use)': 'Managed_forest_Minimal',
    'Occupation, forest, secondary (non-use)': 'Pasture_Light',
    'Occupation, forest, unspecified': 'Managed_forest_Intense',
    'Occupation, grassland, natural (non-use)': 'Pasture_Minimal',
    'Occupation, grassland, natural, for livestock grazing': 'Pasture_Minimal',
    'Occupation, heterogeneous, agricultural': 'Cropland_Minimal',
    'Occupation, industrial area': 'Urban_Intense',
    # 'Occupation, inland waterbody, unspecified': '',
    # 'Occupation, lake, artificial': '',
    # 'Occupation, lake, natural (non-use)': '',
    'Occupation, mineral extraction site': 'Urban_Intense',
    'Occupation, pasture, man made': 'Pasture_Intense',
    'Occupation, pasture, man made, extensive': 'Pasture_Light',
    'Occupation, pasture, man made, intensive': 'Pasture_Intense',
    'Occupation, permanent crop': 'Cropland_Intense',
    'Occupation, permanent crop, irrigated': 'Cropland_Intense',
    'Occupation, permanent crop, irrigated, extensive': 'Cropland_Light',
    'Occupation, permanent crop, irrigated, intensive': 'Cropland_Intense',
    'Occupation, permanent crop, non-irrigated': 'Cropland_Intense',
    'Occupation, permanent crop, non-irrigated, extensive': 'Cropland_Minimal',
    'Occupation, permanent crop, non-irrigated, intensive': 'Cropland_Intense',
    # 'Occupation, river, artificial': '',
    # 'Occupation, river, natural (non-use)': '',
    # 'Occupation, seabed, drilling and mining': '',
    # 'Occupation, seabed, infrastructure': '',
    # 'Occupation, seabed, natural (non-use)': '',
    # 'Occupation, seabed, unspecified': '',
    'Occupation, shrub land, sclerophyllous': 'Managed_forest_Minimal',
    # 'Occupation, snow and ice (non-use)': '',
    'Occupation, traffic area, rail network': 'Urban_Intense',
    'Occupation, traffic area, rail/road embankment': 'Urban_Light',
    'Occupation, traffic area, road network': 'Urban_Intense',
    'Occupation, unspecified': 'Urban_Intense',
    'Occupation, unspecified, natural (non-use)': 'Pasture_Minimal',
    'Occupation, urban, continuously built': 'Urban_Intense',
    'Occupation, urban, discontinuously built': 'Urban_Light',
    'Occupation, urban, green area': 'Urban_Minimal',
    'Occupation, urban/industrial fallow (non-use)': 'Urban_Light',
    # 'Occupation, wetland, coastal (non-use)': '',
    # 'Occupation, wetland, inland (non-use)': '',
}

In [None]:
transformation_to_flows = {
    'Transformation, to annual crop': 'Cropland_Intense',
    'Transformation, to annual crop, flooded crop': 'Cropland_Intense',
    'Transformation, to annual crop, greenhouse': 'Urban_Light',
    'Transformation, to annual crop, irrigated': 'Cropland_Intense',
    'Transformation, to annual crop, irrigated, extensive': 'Cropland_Light',
    'Transformation, to annual crop, irrigated, intensive': 'Cropland_Intense',
    'Transformation, to annual crop, non-irrigated': 'Cropland_Intense',
    'Transformation, to annual crop, non-irrigated, extensive': 'Cropland_Minimal',
    'Transformation, to annual crop, non-irrigated, intensive': 'Cropland_Intense',
    'Transformation, to arable land, unspecified use': 'Cropland_Intense',
    # 'Transformation, to bare area (non-use)': '',
    'Transformation, to cropland fallow (non-use)': 'Cropland_Minimal',
    'Transformation, to dump site': 'Urban_Intense',
    'Transformation, to dump site, inert material landfill': 'Urban_Intense',
    'Transformation, to dump site, residual material landfill': 'Urban_Intense',
    'Transformation, to dump site, sanitary landfill': 'Urban_Intense',
    'Transformation, to dump site, slag compartment': 'Urban_Intense',
    'Transformation, to field margin/hedgerow': 'Cropland_Minimal',
    'Transformation, to forest, extensive': 'Managed_forest_Light',
    'Transformation, to forest, intensive': 'Managed_forest_Intense',
    'Transformation, to forest, primary (non-use)': 'Pasture_Minimal',
    'Transformation, to forest, secondary (non-use)': 'Managed_forest_Light',
    'Transformation, to forest, unspecified': 'Managed_forest_Intense',
    'Transformation, to grassland, natural (non-use)': 'Pasture_Minimal',
    'Transformation, to grassland, natural, for livestock grazing': 'Pasture_Minimal',
    'Transformation, to heterogeneous, agricultural': 'Cropland_Minimal',
    'Transformation, to industrial area': 'Urban_Intense',
    # 'Transformation, to inland waterbody, unspecified': '',
    # 'Transformation, to lake, artificial': '',
    # 'Transformation, to lake, natural (non-use)': '',
    'Transformation, to mineral extraction site': 'Urban_Intense',
    'Transformation, to pasture, man made': 'Pasture_Intense',
    'Transformation, to pasture, man made, extensive': 'Pasture_Light',
    'Transformation, to pasture, man made, intensive': 'Pasture_Intense',
    'Transformation, to permanent crop': 'Cropland_Intense',
    'Transformation, to permanent crop, irrigated': 'Cropland_Intense',
    'Transformation, to permanent crop, irrigated, extensive': 'Cropland_Light',
    'Transformation, to permanent crop, irrigated, intensive': 'Cropland_Intense',
    'Transformation, to permanent crop, non-irrigated': 'Cropland_Intense',
    'Transformation, to permanent crop, non-irrigated, extensive': 'Cropland_Minimal',
    'Transformation, to permanent crop, non-irrigated, intensive': 'Cropland_Intense',
    # 'Transformation, to river, artificial': '',
    # 'Transformation, to river, natural (non-use)': '',
    # 'Transformation, to seabed, drilling and mining': '',
    # 'Transformation, to seabed, infrastructure': '',
    # 'Transformation, to seabed, natural (non-use)': '',
    # 'Transformation, to seabed, unspecified': '',
    'Transformation, to shrub land, sclerophyllous': 'Pasture_Minimal',
    # 'Transformation, to snow and ice (non-use)': '',
    'Transformation, to traffic area, rail network': 'Urban_Intense',
    'Transformation, to traffic area, rail/road embankment': 'Urban_Light',
    'Transformation, to traffic area, road network': 'Urban_Intense',
    'Transformation, to unknown': 'Urban_Intense', # Added - Matched to unspecified
    'Transformation, to unspecified': 'Urban_Intense',
    'Transformation, to unspecified, natural (non-use)': 'Pasture_Minimal',
    'Transformation, to urban, continuously built': 'Urban_Intense',
    'Transformation, to urban, discontinuously built': 'Urban_Light',
    'Transformation, to urban, green area': 'Urban_Minimal',
    'Transformation, to urban/industrial fallow (non-use)': 'Urban_Light',
    # 'Transformation, to wetland, coastal (non-use)': '',
    # 'Transformation, to wetland, inland (non-use)': '',   
}

In [None]:
transformation_from_flows = {
    'Transformation, from annual crop': 'Cropland_Intense',
    'Transformation, from annual crop, flooded crop': 'Cropland_Intense',
    'Transformation, from annual crop, greenhouse': 'Urban_Light',
    'Transformation, from annual crop, irrigated': 'Cropland_Intense',
    'Transformation, from annual crop, irrigated, extensive': 'Cropland_Light',
    'Transformation, from annual crop, irrigated, intensive': 'Cropland_Intense',
    'Transformation, from annual crop, non-irrigated': 'Cropland_Intense',
    'Transformation, from annual crop, non-irrigated, extensive': 'Cropland_Minimal',
    'Transformation, from annual crop, non-irrigated, intensive': 'Cropland_Intense',
    'Transformation, from arable land, unspecified use': 'Cropland_Intense',
    # 'Transformation, from bare area (non-use)': '',
    'Transformation, from cropland fallow (non-use)': 'Cropland_Minimal',
    'Transformation, from dump site': 'Urban_Intense',
    'Transformation, from dump site, inert material landfill': 'Urban_Intense',
    'Transformation, from dump site, residual material landfill': 'Urban_Intense',
    'Transformation, from dump site, sanitary landfill': 'Urban_Intense',
    'Transformation, from dump site, slag compartment': 'Urban_Intense',
    'Transformation, from field margin/hedgerow': 'Cropland_Minimal',
    'Transformation, from forest, extensive': 'Managed_forest_Light',
    'Transformation, from forest, intensive': 'Managed_forest_Intense',
    'Transformation, from forest, primary (non-use)': 'Pasture_Minimal',
    'Transformation, from forest, secondary (non-use)': 'Managed_forest_Light',
    'Transformation, from forest, unspecified': 'Managed_forest_Intense',
    'Transformation, from grassland, natural (non-use)': 'Pasture_Minimal',
    'Transformation, from grassland, natural, for livestock grazing': 'Pasture_Minimal',
    'Transformation, from heterogeneous, agricultural': 'Cropland_Minimal',
    'Transformation, from industrial area': 'Urban_Intense',
    # 'Transformation, from inland waterbody, unspecified': '',
    # 'Transformation, from lake, artificial': '',
    # 'Transformation, from lake, natural (non-use)': '',
    'Transformation, from mineral extraction site': 'Urban_Intense',
    'Transformation, from pasture, man made': 'Pasture_Intense',
    'Transformation, from pasture, man made, extensive': 'Pasture_Light',
    'Transformation, from pasture, man made, intensive': 'Pasture_Intense',
    'Transformation, from permanent crop': 'Cropland_Intense',
    'Transformation, from permanent crop, irrigated': 'Cropland_Intense',
    'Transformation, from permanent crop, irrigated, extensive': 'Cropland_Light',
    'Transformation, from permanent crop, irrigated, intensive': 'Cropland_Intense',
    'Transformation, from permanent crop, non-irrigated': 'Cropland_Intense',
    'Transformation, from permanent crop, non-irrigated, extensive': 'Cropland_Minimal',
    'Transformation, from permanent crop, non-irrigated, intensive': 'Cropland_Intense',
    # 'Transformation, from river, artificial': '',
    # 'Transformation, from river, natural (non-use)': '',
    # 'Transformation, from seabed, drilling and mining': '',
    # 'Transformation, from seabed, infrastructure': '',
    # 'Transformation, from seabed, natural (non-use)': '',
    # 'Transformation, from seabed, unspecified': '',
    'Transformation, from shrub land, sclerophyllous': 'Pasture_Minimal',
    # 'Transformation, from snow and ice (non-use)': '',
    'Transformation, from traffic area, rail network': 'Urban_Intense',
    'Transformation, from traffic area, rail/road embankment': 'Urban_Light',
    'Transformation, from traffic area, road network': 'Urban_Intense',
    'Transformation, from unknown': 'Urban_Intense', # Added - Matched to unspecified
    'Transformation, from unspecified': 'Urban_Intense',
    'Transformation, from unspecified, natural (non-use)': 'Pasture_Minimal',
    'Transformation, from urban, continuously built': 'Urban_Intense',
    'Transformation, from urban, discontinuously built': 'Urban_Light',
    'Transformation, from urban, green area': 'Urban_Minimal',
    'Transformation, from urban/industrial fallow (non-use)': 'Urban_Light',
    # 'Transformation, from wetland, coastal (non-use)': '',
    # 'Transformation, from wetland, inland (non-use)': '',   
}

# 4. JSON generation

In [None]:
cc = coco.CountryConverter()
species_groups = final_output['species_group'].unique()
print(f"Found {len(species_groups)} species groups: {species_groups}")

In [None]:
def generate_occupation_json(species_group):
    """Generate JSON for occupation flows for a specific species group"""
    
    exchanges = []
    
    # Group by country and biosphere flow
    for flow_name, habitat_type in occupation_flows.items():
        
        # Get all countries
        countries_list = final_output['country_iso'].unique()
        
        for country in countries_list:
            # Get country-level CF from cfs_country for this species group
            country_cf_row = cfs_country[
                (cfs_country['iso3cd'] == country) &
                (cfs_country['habitat'] == habitat_type) &
                (cfs_country['species_group'] == species_group)
            ]
            
            # If no country-level CF available, skip this country-habitat combination
            if len(country_cf_row) == 0:
                continue
            
            # Get the country-level CF value
            country_cf_value = country_cf_row['CF_occ_avg_glo'].iloc[0]
            
            # Filter ecoregion data for uncertainty parameters
            country_data = final_output[
                (final_output['country_iso'] == country) &
                (final_output['habitat_type'] == habitat_type) &
                (final_output['species_group'] == species_group)
            ]
            
            if len(country_data) == 0:
                continue
            
            # Get unique ecoregions for uncertainty
            unique_ecoregions = country_data.drop_duplicates(subset=['ecoregion_id'])
            
            # Prepare uncertainty parameters from ecoregion data
            values = unique_ecoregions['cf_occupation_avg_global'].tolist()
            weights = unique_ecoregions['weight_factor'].tolist()
            
            # Get country area
            country_area = country_areas.get(country, 0)
            
            exchange = {
                "supplier": {
                    "name": flow_name,
                    "categories": ["natural resource", "land"],
                    "matrix": "biosphere"
                },
                "consumer": {
                    "location": cc.convert(names=country, to='ISO2'),
                    "matrix": "technosphere"
                },
                "value": float(country_cf_value),
                "weight": float(country_area),
                "uncertainty": {
                    "distribution": "discrete_empirical",
                    "parameters": {
                        "values": [float(v) for v in values],
                        "weights": [float(w) for w in weights]
                    }
                }
            }
            
            exchanges.append(exchange)
    
    output = {
        "name": f"ecoinvent 3.10/3.11 - GLAM3 - Land occupation, biodiversity damage - {species_group}",
        "unit": "PDF.yr",
        "version": "0.1",
        "description": f"Characterization factors for land occupation impacts on biodiversity ({species_group}), considering global species loss. Based on Scherer et al. 2023: https://doi.org/10.5281/zenodo.10114492",
        "species_group": species_group,
        "strategies": [
            "map_exchanges",
            "map_aggregate_locations",
            "map_dynamic_locations",
            "map_contained_locations",
            "map_remaining_locations_to_global"
        ],
        "exchanges": exchanges
    }
    
    return output

In [None]:
def generate_transformation_json(species_group):
    """Generate JSON for transformation flows (from and to combined) for a specific species group"""
    
    exchanges = []
    
    # Process transformation FROM flows
    for flow_name, habitat_type in transformation_from_flows.items():
        
        countries_list = final_output['country_iso'].unique()
        
        for country in countries_list:
            # Get country-level CF from cfs_country for this species group
            country_cf_row = cfs_country[
                (cfs_country['iso3cd'] == country) &
                (cfs_country['habitat'] == habitat_type) &
                (cfs_country['species_group'] == species_group)
            ]
            
            if len(country_cf_row) == 0:
                continue
            
            # Get the country-level CF value for transformation
            country_cf_value = country_cf_row['CF_tra_avg_glo'].iloc[0]
            
            # Filter ecoregion data for uncertainty parameters
            country_data = final_output[
                (final_output['country_iso'] == country) &
                (final_output['habitat_type'] == habitat_type) &
                (final_output['species_group'] == species_group)
            ]
            
            if len(country_data) == 0:
                continue
            
            unique_ecoregions = country_data.drop_duplicates(subset=['ecoregion_id'])
            
            values = unique_ecoregions['cf_transformation_avg_global'].tolist()
            weights = unique_ecoregions['weight_factor'].tolist()
            
            country_area = country_areas.get(country, 0)
            
            
            exchange = {
                "supplier": {
                    "name": flow_name,
                    "categories": ["natural resource", "land"],
                    "matrix": "biosphere"
                },
                "consumer": {
                    "location": cc.convert(names=country, to='ISO2'),
                    "matrix": "technosphere"
                },
                "value": float(country_cf_value),
                "weight": float(country_area),
                "uncertainty": {
                    "distribution": "discrete_empirical",
                    "parameters": {
                        "values": [float(v) for v in values],
                        "weights": [float(w) for w in weights]
                    }
                }
            }
            
            exchanges.append(exchange)
    
    # Process transformation TO flows
    for flow_name, habitat_type in transformation_to_flows.items():
        
        countries_list = final_output['country_iso'].unique()
        
        for country in countries_list:
            # Get country-level CF from cfs_country for this species group
            country_cf_row = cfs_country[
                (cfs_country['iso3cd'] == country) &
                (cfs_country['habitat'] == habitat_type) &
                (cfs_country['species_group'] == species_group)
            ]
            
            if len(country_cf_row) == 0:
                continue
            
            # Get the country-level CF value for transformation
            country_cf_value = country_cf_row['CF_tra_avg_glo'].iloc[0]
            
            # Filter ecoregion data for uncertainty parameters
            country_data = final_output[
                (final_output['country_iso'] == country) &
                (final_output['habitat_type'] == habitat_type) &
                (final_output['species_group'] == species_group)
            ]
            
            if len(country_data) == 0:
                continue
            
            unique_ecoregions = country_data.drop_duplicates(subset=['ecoregion_id'])
            
            values = unique_ecoregions['cf_transformation_avg_global'].tolist()
            weights = unique_ecoregions['weight_factor'].tolist()
            
            country_area = country_areas.get(country, 0)
            
            exchange = {
                "supplier": {
                    "name": flow_name,
                    "categories": ["natural resource", "land"],
                    "matrix": "biosphere"
                },
                "consumer": {
                    "location": cc.convert(names=country, to='ISO2'),
                    "matrix": "technosphere"
                },
                "value": float(country_cf_value),
                "weight": float(country_area),
                "uncertainty": {
                    "distribution": "discrete_empirical",
                    "parameters": {
                        "values": [float(v) for v in values],
                        "weights": [float(w) for w in weights]
                    }
                }
            }
            
            exchanges.append(exchange)
    
    output = {
        "name": f"ecoinvent 3.10/3.11 - GLAM3 - Land transformation, biodiversity damage - {species_group}",
        "unit": "PDF.yr",
        "version": "0.1",
        "description": f"Characterization factors for land transformation impacts on biodiversity ({species_group}), considering global species loss. Based on Scherer et al. 2023: https://doi.org/10.5281/zenodo.10114492",
        "species_group": species_group,
        "strategies": [
            "map_exchanges",
            "map_aggregate_locations",
            "map_dynamic_locations",
            "map_contained_locations",
            "map_remaining_locations_to_global"
        ],
        "exchanges": exchanges
    }
    
    return output

In [None]:
for species_group in species_groups:
    print(f"\n--- Processing {species_group} ---")
    
    # Generate occupation JSON for this species group
    print(f"\n=== Generating Occupation JSON for {species_group} ===")
    occupation_json = generate_occupation_json(species_group)
    
    # Generate transformation JSON for this species group
    print(f"\n=== Generating Transformation JSON for {species_group} ===")
    transformation_json = generate_transformation_json(species_group)
    
    # Save to files with species group in filename
    species_group_clean = species_group.replace(' ', '_').lower()
    
    occupation_filename = f"output/GLAM3_biodiversity_occupation_average_{species_group_clean}.json"
    transformation_filename = f"output/GLAM3_biodiversity_transformation_average_{species_group_clean}.json"
    
    with open(occupation_filename, "w") as f:
        json.dump(occupation_json, f, indent=2)
    
    with open(transformation_filename, "w") as f:
        json.dump(transformation_json, f, indent=2)