In [1]:
import csv
import json
import numpy as np
import pandas as pd
import country_converter as coco

In [2]:
fp = "results_nonbilateral.xlsx"

In [3]:
# Load the data into a DataFrame
df = pd.read_excel(fp, sheet_name="Sheet1")

In [4]:
# Filter for year 2022
df = df[df["Year"] == 2022]

In [5]:
# Define which rows have any "Unknown" entries in key columns
unknown_mask = (
    (df["Dataset name"] == "Unknown") |
    (df["Dataset reference product"] == "Unknown") |
    (df["Operator"] == "Unknown")
)

# Print unique resources being deleted
deleted_resources = df.loc[unknown_mask, "Resource Name"].unique()
print("Deleted resources due to 'Unknown' entries:")
for r in deleted_resources:
    print("-", r)

# Filter out the unwanted rows
df = df.loc[~unknown_mask].copy()

Deleted resources due to 'Unknown' entries:
- Diatomite


In [6]:
# Replace NaNs in string columns with empty strings
string_columns = ["Dataset name", "Dataset reference product", "Operator", "Excludes"]
df[string_columns] = df[string_columns].fillna("")

In [7]:
def add_market(x):
    items = [item.strip() for item in str(x).split(";") if item.strip() and item.strip().lower() != "nan"]
    if "market" not in items:
        items.append("market")
    return "; ".join(sorted(set(items))) if items else ""

# def add_market(x):
#     if pd.isna(x) or str(x).lower() == "nan":
#         return "market"
#     items = [item.strip() for item in str(x).split(";") if item.strip() and item.strip().lower() != "nan"]
#     if "market" not in items:
#         items.append("market")
#     return "; ".join(sorted(set(items)))

In [8]:
country_map = {
    c: coco.convert(c, to="iso2") for c in df["Importing Country"].unique()
}

country_map.update(
    {c: coco.convert(c, to="iso2") for c in df["Exporting Country"].unique()}
)

for k, v in country_map.items():
    if v == "not found":
        print(k)

country_map = {k: v for k, v in country_map.items() if v != "not found"}

Other Asia, nes not found in regex
Global not found in regex
Other Asia, nes not found in regex
Unknown not found in regex


Other Asia, nes
Global
Unknown


In [9]:
data = []

df["Exporting Location"] = df["Exporting Country"].map(country_map).fillna("GLO")
df["Importing Location"] = df["Importing Country"].map(country_map).fillna("GLO")

grouped = df.groupby(["Resource Name", "Exporting Country", "Importing Country"])

for (resource, export_country, importing_country), group in grouped:
    cf_series = group["GeoPolRisk Characterization Factor Normalized to copper [-]"]
    
    if not cf_series.empty and cf_series.mean() != 0:
        # Get values or use "" if missing
        dataset_name = group["Dataset name"].iloc[0]
        ref_product = group["Dataset reference product"].iloc[0]
        operator = group["Operator"].iloc[0]

        supplier_info = {
            "name": "" if pd.isna(dataset_name) else dataset_name,
            "reference product": "" if pd.isna(ref_product) else ref_product,
            "location": group["Exporting Location"].iloc[0],
            "operator": "" if pd.isna(operator) else operator,
            "matrix": "technosphere"
        }

        excludes_val = group["Excludes"].iloc[0]
        if pd.isna(excludes_val) or excludes_val.strip() == "":
            supplier_info["excludes"] = []
        else:
            supplier_info["excludes"] = [e.strip() for e in excludes_val.split(";") if e.strip()]

        data.append({
            "supplier": supplier_info,
            "consumer": {
                "location": group["Importing Location"].iloc[0],
                "matrix": "technosphere"
            },
            "value": cf_series.mean(),
            "weight": group["Specific Imports"].sum()
        })


In [10]:
# data = []

# df["Exporting Location"] = df["Exporting Country"].map(country_map).fillna("GLO")
# df["Importing Location"] = df["Importing Country"].map(country_map).fillna("GLO")


# grouped = df.groupby(["Resource Name", "Exporting Country", "Importing Country"])

# for (resource, export_country, importing_country), group in grouped:
#     if not group["GeoPolRisk Characterization Factor Normalized to copper [-]"].empty:
#         if group["GeoPolRisk Characterization Factor Normalized to copper [-]"].mean() != 0:
#             data.append({
#                 "supplier": {
#                     "name": group["Dataset name"].iloc[0],
#                     "reference product": group["Dataset reference product"].iloc[0],
#                     "location": group["Exporting Location"].iloc[0],
#                     "operator": group["operator"].iloc[0],
#                     "matrix": "technosphere"
#                 },
#                 "consumer": {
#                     "location": group["Importing Location"].iloc[0],
#                     "matrix": "technosphere"
#                 },
#                 "value": group["GeoPolRisk Characterization Factor Normalized to copper [-]"].mean(),
#             })


In [11]:
len(data)

41364

In [12]:
data[0]

{'supplier': {'name': 'aluminium production, primary',
  'reference product': 'aluminium, primary',
  'location': 'AL',
  'operator': 'startswith',
  'matrix': 'technosphere',
  'excludes': ['liquid', 'alloy', 'market']},
 'consumer': {'location': 'BA', 'matrix': 'technosphere'},
 'value': 0.0001192271411315043,
 'weight': 11.81174393730462}

In [13]:
target_flow = [
    d for d in data
    if d["supplier"]["reference product"] == "aluminium, primary"
    and d["supplier"]["location"] == "AU"
    and d["consumer"]["location"] == "CA"
]
target_flow

[{'supplier': {'name': 'aluminium production, primary',
   'reference product': 'aluminium, primary',
   'location': 'AU',
   'operator': 'startswith',
   'matrix': 'technosphere',
   'excludes': ['liquid', 'alloy', 'market']},
  'consumer': {'location': 'CA', 'matrix': 'technosphere'},
  'value': 0.0002417527334203126,
  'weight': 250.3944470393658}]

In [14]:
# data[0]

In [15]:
# fp_out = "GeoPolRisk_paired_2024.json"
# with open(fp_out, "w") as f:
#     json.dump(
#             {
#                 "name": f"GeoPolRisk - country pairs",
#                 "unit": "kg copper-eq.",
#                 "version": "1.0",
#                 "exchanges": data,
#             },
#             f,
#             indent=2
#         )

# Add additional values

In [16]:
extra_cf_data = [
    {
        "resource": "Silicon",
        "value": 11.85781223,
        "weight": 1,
        "name": "silicon production",
        "reference_product": "silicon, metallurgical",
        "operator": "startswith",
        "excludes": ["silicone", "market"],
    },
    {
        "resource": "Potassium",
        "value": 0.297397074,
        "weight": 1,
        "name": "",
        "reference_product": "potassium hydroxide",
        "operator": "contains",
        "excludes": ["market"],
    },
    {
        "resource": "Strontium",
        "value": 11.23780594,
        "weight": 1,
        "name": "strontium carbonate",
        "reference_product": "strontium carbonate",
        "operator": "startswith",
        "excludes": ["market"],
    },
    {
        "resource": "Iridium",
        "value": 16813.59791,
        "weight": 1,
        "name": "",
        "reference_product": "iridium",
        "operator": "startswith",
        "excludes": ["market"],
    },
    {
        "resource": "Ferrosilicon",
        "value": 1.648469856,
        "weight": 1,
        "name": "ferrosilicon",
        "reference_product": "ferrosilicon",
        "operator": "startswith",
        "excludes": ["market"],
    },
    {
        "resource": "Ruthenium",
        "value": 20312.17118,
        "weight": 1,
        "name": "",
        "reference_product": "ruthenium",
        "operator": "startswith",
        "excludes": ["market"],
    }
]

In [17]:
# silicon_locations = ["APAC","AU","BR","BT","CA","CN","DE","ES","FR","IN","IS",
#                      "KZ","MY","NO","PL","RU","UA","US"]
# ruthenium_locations = ["CA","RU","ZA","ZW"]

# potassium_locations = ["RER"]
# ruthenium_locations = ["","","","",""]
# ruthenium_locations = ["","","","",""]
# ruthenium_locations = ["","","","",""]

In [18]:
# Add single global entry for each extra CF
for item in extra_cf_data:
    supplier = {
        "name": item["name"],
        "reference product": item["reference_product"],
        "location": "",  # empty string for global match
        "operator": item["operator"],
        "matrix": "technosphere",
        "excludes": item["excludes"]
    }

    consumer = {
        "location": "",  # global match
        "matrix": "technosphere"
    }

    data.append({
        "supplier": supplier,
        "consumer": consumer,
        "value": item["value"],
        "weight": item["weight"]
    })

In [19]:
fp_out = "GeoPolRisk_paired_2024.json"
with open(fp_out, "w") as f:
    json.dump(
        {
            "name": f"GeoPolRisk - country pairs",
            "unit": "kg copper-eq.",
            "version": "1.0",
            "exchanges": data,
        },
        f,
        indent=2,  
    )