In [22]:
import pandas as pd

In [23]:
cfs = pd.read_excel("impact_world_plus_2.1_dev.xlsx", sheet_name="Sheet1")
cfs = cfs.loc[cfs["CF value"]!=0]
cfs = cfs.loc[
    (cfs["Native geographical resolution scale"] == "Country")
    # | (cfs["Native geographical resolution scale"] == "Continent")
]
cfs.head()

Unnamed: 0.1,Unnamed: 0,Impact category,CF unit,Compartment,Sub-compartment,Elem flow name,CAS number,CF value,Elem flow unit,MP or Damage,Native geographical resolution scale
3050,3050,Freshwater acidification,kg SO2 eq,Air,(unspecified),"Ammonia, AD",7664-41-7,0.180141,kg,Midpoint,Country
3051,3051,Freshwater acidification,kg SO2 eq,Air,high. pop.,"Ammonia, AD",7664-41-7,0.180141,kg,Midpoint,Country
3052,3052,Freshwater acidification,kg SO2 eq,Air,low. pop.,"Ammonia, AD",7664-41-7,0.180141,kg,Midpoint,Country
3053,3053,Freshwater acidification,kg SO2 eq,Air,stratosphere + troposphere,"Ammonia, AD",7664-41-7,0.180141,kg,Midpoint,Country
3054,3054,Freshwater acidification,kg SO2 eq,Air,indoor,"Ammonia, AD",7664-41-7,0.180141,kg,Midpoint,Country


In [24]:
# create a column called `location` where we extract teh location from the `Elem flow name` column
# location are capital letters after the last comma
cfs["location"] = cfs["Elem flow name"].str.extract(r',\s*([A-Z-]+)$')
cfs["location"].unique()

array(['AD', 'AE', 'AF', 'AG', 'AI', 'AL', 'AM', 'AO', 'AQ', 'AR', 'AS',
       'AT', 'AU', 'AU-NSW', 'AU-QLD', 'AU-SA', 'AU-TAS', 'AU-VIC',
       'AU-WA', 'AW', 'AZ', 'BA', 'BB', 'BD', 'BE', 'BF', 'BG', 'BH',
       'BI', 'BJ', 'BN', 'BO', 'BR', 'BR-AC', 'BR-AL', 'BR-AM', 'BR-AP',
       'BR-BA', 'BR-CE', 'BR-DF', 'BR-ES', 'BR-GO', 'BR-MA', 'BR-MG',
       'BR-MS', 'BR-MT', nan, 'BR-PA', 'BR-PB', 'BR-PE', 'BR-PI', 'BR-PR',
       'BR-RJ', 'BR-RN', 'BR-RO', 'BR-RR', 'BR-RS', 'BR-SC', 'BR-SE',
       'BR-SP', 'BR-TO', 'BS', 'BT', 'BV', 'BW', 'BY', 'BZ', 'CA',
       'CA-AB', 'CA-BC', 'CA-MB', 'CA-NB', 'CA-NF', 'CA-NS', 'CA-NT',
       'CA-NU', 'CA-ON', 'CA-PE', 'CA-QC', 'CA-SK', 'CA-YK', 'CD', 'CF',
       'CG', 'CH', 'CI', 'CK', 'CL', 'CM', 'CN', 'CN-AH', 'CN-BJ',
       'CN-CCG', 'CN-CQ', 'CN-CSG', 'CN-ECGC', 'CN-FJ', 'CN-GD', 'CN-GS',
       'CN-GX', 'CN-GZ', 'CN-HA', 'CN-HB', 'CN-HE', 'CN-HL', 'CN-HN',
       'CN-HU', 'CN-JL', 'CN-JS', 'CN-JX', 'CN-LN', 'CN-NCGC', 'CN-NECG',
      

In [25]:
# fix values under `Elem flow name` column, by removing the location and the comma preceding it
cfs["Elem flow name"] = cfs["Elem flow name"].str.rsplit(', ', n=1).str[0]
cfs["Elem flow name"].unique()

array(['Ammonia', 'Ammonia, as N', 'Ammonium carbonate',
       'Ammonium nitrate', 'Ammonium, ion', 'Nitrate', 'Nitric oxide',
       'Nitrite', 'Nitrogen dioxide', 'Nitrogen oxides', 'Sulfate',
       'Sulfur dioxide', 'Sulfur trioxide', 'Sulfuric acid', 'BOD', 'COD',
       'Phosphate', 'Phosphoric acid', 'Phosphorus pentoxide',
       'Phosphorus', 'Occupation, agriculture, mosaic (agroforestry)',
       'Occupation, annual crops', 'Occupation, artificial areas',
       'Occupation, forest, used', 'Occupation, pasture/meadow',
       'Occupation, permanent crops', 'Occupation, secondary vegetation',
       'Occupation, unspecified',
       'Transformation, from agriculture, mosaic (agroforestry)',
       'Transformation, from annual crops',
       'Transformation, from artificial areas',
       'Transformation, from forest, used',
       'Transformation, from pasture/meadow',
       'Transformation, from permanent crops',
       'Transformation, from secondary vegetation',
       '

In [26]:
cfs = cfs.loc[~cfs["location"].isnull()]

In [27]:
# Load IW <-> ecoinvent mapping
df_map = pd.read_excel("ei_iw_mapping.xlsx")

In [28]:
# Create the defaultdict(list) mapping from 'iw name' to 'ecoinvent name'
from collections import defaultdict

iw_to_ei = defaultdict(list)

# Iterate over the rows and populate the mapping
for _, row in df_map.iterrows():
    iw_to_ei[row['iw name']].append(row['ecoinvent name'])

In [29]:
iw_to_ei["Ammonium, ion"]

['Ammonium, ion', 'Ammonium']

In [30]:
iw_to_ei["Occupation, forest, intensive"]

[]

In [31]:
# create a `ecoinvent main compartment` column, and map the
# `Compartment` column to ecoinvent main compartments
# using the main_comp dictionary

main_comp = {"Air": "air", "Raw": "natural resource", "Soil": "soil", "Water": "water"}
cfs["ecoinvent_main_compartment"] = cfs["Compartment"].map(main_comp)

print(cfs.loc[cfs["ecoinvent_main_compartment"].isna(), "Compartment"].unique())

[]


In [32]:
# create a `ecoinvent sub compartment` column, and map the
# `Sub-compartment` column to ecoinvent main compartments
# using the sub_comp dictionary

sub_comp = {
    "(unspecified)": "unspecified",
    "agricultural": "agricultural",
    "biotic": "biotic",
    "groundwater": "ground-",
    "groundwater, long-term": "ground-, long-term",
    "high. pop.": "urban air close to ground",
    "in air": "in air",
    "in ground": "in ground",
    "in water": "in water",
    "indoor": "indoor",
    "industrial": "industrial",
    "lake": "surface water",
    "land": "land",
    "low. pop.": "non-urban air or from high stacks",
    "low. pop., long-term": "low population density, long-term",
    "ocean": "ocean",
    "river": "surface water",
    "stratosphere + troposphere": "lower stratosphere + upper troposphere"
}

cfs["ecoinvent_sub_compartment"] = cfs["Sub-compartment"].map(sub_comp)
print(cfs.loc[cfs["ecoinvent_sub_compartment"].isna(), "Sub-compartment"].unique())

[]


In [33]:
"Occupation, artificial areas" in cfs["Elem flow name"].unique()

True

In [34]:
import copy
cfs_data = {}
for c, category in enumerate(cfs["Impact category"].unique()):
    for t, impact_type in enumerate(cfs["MP or Damage"].unique()):
        subset = cfs.loc[
            (cfs["Impact category"] == category)
            & (cfs["MP or Damage"] == impact_type)
        ]

        if len(subset) == 0:
            continue

        cfs_data[f"{category}_{impact_type.lower()}"] = []

        # iterate through rows
        for _, row in subset.iterrows():
            if row["CF value"] != 0:
                for ecoinvent_name in iw_to_ei[row["Elem flow name"]]:
                    data = {
                        "supplier": {
                            "name": ecoinvent_name,
                            "categories": [row["ecoinvent_main_compartment"], row["ecoinvent_sub_compartment"]] if row["ecoinvent_sub_compartment"] != "unspecified" else [row["ecoinvent_main_compartment"]],
                            "matrix": "biosphere"
                        },
                        "consumer": {
                            "location": row["location"],
                            "matrix": "technosphere"
                        },
                        "value": row["CF value"]
                    }
                    if data not in cfs_data[f"{category}_{impact_type.lower()}"]:
                        cfs_data[f"{category}_{impact_type.lower()}"].append(data)


In [35]:
# save each category as a separate json file
import json

units = {
    'Climate change, long term_midpoint':'kg CO2 eq (long)',
    'Climate change, short term_midpoint':'kg CO2 eq (short)',
    'Fossil and nuclear energy use_midpoint':'MJ deprived',
    'Freshwater acidification_midpoint':'kg SO2 eq',
    'Freshwater ecotoxicity_midpoint':'CTUe',
    'Freshwater eutrophication_midpoint':'kg PO4 P-lim eq',
    'Human toxicity cancer_midpoint':'CTUh',
    'Human toxicity non-cancer_midpoint':'CTUh',
    'Ionizing radiations_midpoint':'Bq C-14 eq',
    'Land occupation, biodiversity_midpoint':'m2 arable land eq .yr',
    'Land transformation, biodiversity_midpoint':'m2 arable land eq',
    'Marine eutrophication_midpoint':'kg N N-lim eq',
    'Mineral resources use_midpoint':'kg deprived',
    'Ozone layer depletion_midpoint':'kg CFC-11 eq',
    'Particulate matter formation_midpoint':'kg PM2.5 eq',
    'Photochemical ozone formation_midpoint':'kgNOxeq',
    'Plastics physical effects on biota_midpoint':'CTUe',
    'Terrestrial acidification_midpoint':'kg SO2 eq',
    'Water scarcity_midpoint':'m3 world-eq',
    'Climate change, human health, long term_damage':'DALY',
    'Climate change, human health, short term_damage':'DALY',
    'Human toxicity cancer, long term_damage':'DALY',
    'Human toxicity cancer, short term_damage':'DALY',
    'Human toxicity non-cancer, long term_damage':'DALY',
    'Human toxicity non-cancer, short term_damage':'DALY',
    'Ionizing radiations, human health_damage':'DALY',
    'Ozone layer depletion_damage':'DALY',
    'Particulate matter formation_damage':'DALY',
    'Photochemical ozone formation, human health_damage':'DALY',
    'Water availability, human health_damage':'DALY',
    'Climate change, ecosystem quality, long term_damage':'PDF.m2.yr',
    'Climate change, ecosystem quality, short term_damage':'PDF.m2.yr',
    'Fisheries impact_damage':'PDF.m2.yr',
    'Freshwater acidification_damage':'PDF.m2.yr',
    'Freshwater ecotoxicity, long term_damage':'PDF.m2.yr',
    'Freshwater ecotoxicity, short term_damage':'PDF.m2.yr',
    'Freshwater eutrophication_damage':'PDF.m2.yr',
    'Ionizing radiations, ecosystem quality_damage':'PDF.m2.yr',
    'Land occupation, biodiversity_damage':'PDF.m2.yr',
    'Land transformation, biodiversity_damage':'PDF.m2.yr',
    'Marine acidification, long term_damage':'PDF.m2.yr',
    'Marine acidification, short term_damage':'PDF.m2.yr',
    'Marine ecotoxicity, long term_damage':'PDF.m2.yr',
    'Marine ecotoxicity, short term_damage':'PDF.m2.yr',
    'Marine eutrophication_damage':'PDF.m2.yr',
    'Photochemical ozone formation, ecosystem quality_damage':'PDF.m2.yr',
    'Plastics physical effects on biota_damage':'PDF.m2.yr',
    'Terrestrial acidification_damage':'PDF.m2.yr',
    'Terrestrial ecotoxicity, long term_damage':'PDF.m2.yr',
    'Terrestrial ecotoxicity, short term_damage':'PDF.m2.yr',
    'Thermally polluted water_damage':'PDF.m2.yr',
    'Water availability, freshwater ecosystem_damage':'PDF.m2.yr',
    'Water availability, terrestrial ecosystem_damage':'PDF.m2.yr',
}

for category in cfs_data:
    with open(f"/Users/romain/GitHub/edges/edges/data/ImpactWorld+ 2.1_{category}.json", "w") as f:
        print(f"Saving {category}")
        json.dump(
            {
                "name": f"ImpactWorld+ 2.1 - {category}",
                "unit": units[category],
                "version": "2.1",
                "exchanges": cfs_data[category],
            },
            f,
            indent=2
        )

Saving Freshwater acidification_midpoint
Saving Freshwater acidification_damage
Saving Freshwater ecotoxicity_midpoint
Saving Freshwater eutrophication_midpoint
Saving Freshwater eutrophication_damage
Saving Land occupation, biodiversity_midpoint
Saving Land occupation, biodiversity_damage
Saving Land transformation, biodiversity_midpoint
Saving Land transformation, biodiversity_damage
Saving Marine eutrophication_midpoint
Saving Marine eutrophication_damage
Saving Particulate matter formation_midpoint
Saving Particulate matter formation_damage
Saving Photochemical ozone formation_midpoint
Saving Terrestrial acidification_midpoint
Saving Terrestrial acidification_damage
Saving Water scarcity_midpoint
Saving Photochemical ozone formation, human health_damage
Saving Water availability, human health_damage
Saving Freshwater ecotoxicity, long term_damage
Saving Freshwater ecotoxicity, short term_damage
Saving Marine ecotoxicity, long term_damage
Saving Marine ecotoxicity, short term_damage

In [21]:
list(cfs_data.keys())

['Freshwater acidification_midpoint',
 'Freshwater acidification_damage',
 'Freshwater ecotoxicity_midpoint',
 'Freshwater ecotoxicity_damage',
 'Freshwater eutrophication_midpoint',
 'Freshwater eutrophication_damage',
 'Land occupation, biodiversity_midpoint',
 'Land occupation, biodiversity_damage',
 'Land transformation, biodiversity_midpoint',
 'Land transformation, biodiversity_damage',
 'Marine eutrophication_midpoint',
 'Marine eutrophication_damage',
 'Particulate matter formation_midpoint',
 'Particulate matter formation_damage',
 'Photochemical ozone formation_midpoint',
 'Photochemical ozone formation_damage',
 'Terrestrial acidification_midpoint',
 'Terrestrial acidification_damage',
 'Water scarcity_midpoint',
 'Water scarcity_damage',
 'Photochemical ozone formation, human health_midpoint',
 'Photochemical ozone formation, human health_damage',
 'Water availability, human health_midpoint',
 'Water availability, human health_damage',
 'Freshwater ecotoxicity, long term_mi

In [20]:
import json

#load json
with open("/Users/romain/Github/edges/edges/data/ImpactWorld+ 2.1_Freshwater acidification_midpoint.json") as f:
    data = json.load(f)

def get_key(record):
    supplier = record["supplier"]
    consumer = record["consumer"]
    # If order doesn't matter in categories, sort them:
    categories = tuple(sorted(supplier["categories"]))
    return (
        supplier["name"],
        categories,
        supplier["matrix"],
        consumer["location"],
        consumer["matrix"]
    )

seen_keys = set()
duplicates = []
for rec in data:
    key = get_key(rec)
    if key in seen_keys:
        duplicates.append(rec)
    else:
        seen_keys.add(key)

if duplicates:
    print("Duplicates found:")
    for dup in duplicates:
        print(dup)

Duplicates found:
