# 1. General Problem Set-up

## Import packages

In [1]:
import pandas as pd
import numpy as np

## Import LCA matrices from OpenLCA

In [2]:
A_df = pd.read_csv("data/A.csv", header=None)
B_df = pd.read_csv("data/B.csv", header=None)
C_df = pd.read_csv("data/C.csv", header=None)

# Convert all string-looking numbers to floats
A = A_df.apply(pd.to_numeric, errors='coerce').values
B = B_df.apply(pd.to_numeric, errors='coerce').values
C = C_df.apply(pd.to_numeric, errors='coerce').values

In [3]:
A_index_df = pd.read_csv("data/index_A.csv")
B_index_df = pd.read_csv("data/index_B.csv")
C_index_df = pd.read_csv("data/index_C.csv")

## Removing Transportation (deregionalization)

In [4]:
A_transport_df = pd.read_csv("data/Transportation_A.csv")

In [5]:
# create a dict mapping each provider name to all its indices in A_index_df
mapping = A_index_df.groupby('provider name')['index'].apply(list)

# build a single flat list of all matching indices for the foreground processes
matched_indices_transport = [
    idx
    for name in A_transport_df['provider name']
    if name in mapping
    for idx in mapping[name]
]

In [6]:
import numpy as np

# matched_indices_transport is the list of indices to remove
to_drop = np.array(sorted(set(matched_indices_transport), key=int))

# 1) Remove from A_index_df
mask_keep = ~A_index_df['index'].isin(to_drop)
A_index_df = A_index_df.loc[mask_keep].copy()

# 2) Remove corresponding rows and columns from A
A = np.delete(A, to_drop, axis=0)  # remove rows
A = np.delete(A, to_drop, axis=1)  # remove columns

# 3) Remove the same columns from B (keep rows)
B = np.delete(B, to_drop, axis=1)

# 4) Reset the index column in A_index_df
A_index_df['index'] = np.arange(len(A_index_df), dtype=int)

## Aggregating electricity

In [7]:
A_elec_df = pd.read_csv("data/Electricity_A.csv")

In [8]:
# Inputs assumed:
# A : numeric numpy array (rows x cols)
# A_index_df : DataFrame with columns ["index", "provider name", "flow name", ...]
# A_elec_df : DataFrame with column ["provider name"] listing all electricity providers
# The indices in A_index_df["index"] align with both row and column positions of A.

# 0) Build the set of electricity provider names
elec_names = set(A_elec_df['provider name'].dropna().astype(str).unique())

# 1) Find their indices in A_index_df
elec_idx = A_index_df.loc[A_index_df['provider name'].astype(str).isin(elec_names), 'index'].astype(int).unique()

# 2) Locate the mix row index (must exist)
mix_name = "Electricity Mix (Global)"
mix_rows = A_index_df.loc[A_index_df['provider name'] == mix_name, 'index'].astype(int).unique()
if len(mix_rows) == 0:
    raise ValueError("Electricity Mix (Global) not found in A_index_df['provider name'].")
mix_idx = int(mix_rows[0])

# Ensure the mix row is not purged
elec_idx_set = set(map(int, elec_idx))
elec_idx_wo_mix = sorted(elec_idx_set - {mix_idx})

# 3) Aggregate: add all electricity rows (except the mix row) into the mix row, column-wise
if len(elec_idx_wo_mix) > 0:
    # in case of NaNs
    add_block = np.nansum(A[elec_idx_wo_mix, :], axis=0)
    A[mix_idx, :] = np.nan_to_num(A[mix_idx, :]) + np.nan_to_num(add_block)

# 4) Decide what to drop
rows_to_drop = np.array(elec_idx_wo_mix, dtype=int)            # drop electricity rows except the mix row
cols_to_drop = np.array(elec_idx_wo_mix, dtype=int)            # drop electricity columns except the mix column

# (Optionally also drop the mix COLUMN; keep it if you want to retain that process as a column)
# To ALSO drop the mix column, uncomment the next line:
# cols_to_drop = np.array(sorted(elec_idx_set), dtype=int)

# 5) Remove rows/columns from A
if rows_to_drop.size > 0:
    A = np.delete(A, rows_to_drop, axis=0)
if cols_to_drop.size > 0:
    A = np.delete(A, cols_to_drop, axis=1)

# 6) Remove the same rows from A_index_df (only rows; columns in A_index_df are metadata)
if len(elec_idx_wo_mix) > 0:
    keep_mask = ~A_index_df['index'].astype(int).isin(elec_idx_wo_mix)
    A_index_df = A_index_df.loc[keep_mask].copy()

# 7) Reset the "index" column in A_index_df to reflect 0..n-1 after deletions
A_index_df['index'] = np.arange(len(A_index_df), dtype=int)

## Identifying background flows for cost calculation

In [9]:
A_foreground_df = pd.read_csv("data/Foreground_A.csv")

In [10]:
import numpy as np
import pandas as pd

# Helper for consistent comparison
norm = lambda s: str(s).strip().casefold()

# Normalize keys
A_index_df = A_index_df.copy()
A_foreground_df = A_foreground_df.copy()
A_index_df['provider_key'] = A_index_df['provider name'].map(norm)
A_foreground_df['provider_key'] = A_foreground_df['provider name'].map(norm)

# Build mapping from provider -> indices
idx_map = (
    A_index_df
    .groupby('provider_key')['index']
    .apply(lambda s: list(map(int, s)))
    .to_dict()
)

# Foreground indices (to exclude later)
matched_indices = sorted({
    idx
    for key in A_foreground_df['provider_key'].unique()
    for idx in idx_map.get(key, [])
})

# Example: build all_nonzero from the numeric matrix A
# (take all row indices with nonzero entries across the columns in matched_indices)
all_nonzero = set()
for col in matched_indices:
    all_nonzero.update(np.nonzero(A[:, col])[0])

# Remove overlap
foreground_set = set(matched_indices)
all_nonzero_set = {int(x) for x in all_nonzero}
filtered_nonzero_rows = sorted(all_nonzero_set - foreground_set)


In [11]:
result_df = A_index_df[A_index_df['index'].isin(filtered_nonzero_rows)].copy()

In [12]:
result_df.to_csv('filtered_nonzero_rows_with_names.csv', index=False)

## Importing financial data for cost calculation

In [13]:
financial_df = pd.read_csv("data/Financial.csv", header=None)

# 2. Static Optimization

## 2.1. Base Case (Linear Fossil Economy in 2025)

### Electricity Mix

In [14]:
# Global Electricity Mix for 2025
electricity_mix_df = pd.read_csv("data/electricity_mix.csv")

In [15]:
import numpy as np
import pandas as pd

# Helper to normalize keys (to avoid case/space mismatches)
norm = lambda s: str(s).strip().casefold()

# Normalize provider names in both DataFrames
A_index_df = A_index_df.copy()
electricity_mix_df = electricity_mix_df.copy()
A_index_df['provider_key'] = A_index_df['provider name'].map(norm)
electricity_mix_df['provider_key'] = electricity_mix_df['provider name'].map(norm)

# Find the column index for "Electricity Mix (Global)"
mix_key = norm("Electricity Mix (Global)")
mix_idx_arr = A_index_df.loc[A_index_df['provider_key'] == mix_key, 'index'].astype(int).values
if len(mix_idx_arr) == 0:
    raise ValueError("'Electricity Mix (Global)' not found in A_index_df['provider name']")
mix_col = int(mix_idx_arr[0])

# Iterate over providers in electricity_mix_df and assign values into A
for _, row in electricity_mix_df.iterrows():
    prov_key = row['provider_key']
    energy_val = row['2025 Energy Mix']

    # find the row index (from A_index_df) for this provider
    idxs = A_index_df.loc[A_index_df['provider_key'] == prov_key, 'index'].astype(int).values
    for ridx in idxs:
        A[ridx, mix_col] = energy_val

In [16]:
A[int(A_index_df.loc[A_index_df["provider name"]=="Electricity, hydropower (life cycle)","index"].iloc[0]),
  int(A_index_df.loc[A_index_df["provider name"]=="Electricity Mix (Global)","index"].iloc[0])]

-0.1432

### Collection and Sorting

In [17]:
packaging_types_df = pd.read_csv("data/packaging_types.csv")
collection_recyclate_df = pd.read_csv("data/collection_Recyclate.csv")
collection_msw_df = pd.read_csv("data/collection_MSW.csv")

In [18]:
import numpy as np
import pandas as pd
import re

# ---- exact-key matcher (case/space insensitive, keeps stage prefixes) ----
ekey = lambda s: str(s).strip().casefold()

# ---- copies + normalized keys ----
A_index_df = A_index_df.copy()
packaging_types_df = packaging_types_df.copy()
collection_msw_df = collection_msw_df.copy()

A_index_df["prov_exact"]         = A_index_df["provider name"].map(ekey)
packaging_types_df["prov_exact"] = packaging_types_df["provider name"].map(ekey)
collection_msw_df["prov_exact"]  = collection_msw_df["provider name"].map(ekey)

# If you know the 3 mapping columns, set them here, e.g.:
# mapping_cols = ["dest_1", "dest_2", "dest_3"]
# Otherwise, auto-detect up to 3 string columns besides 'provider name'
mapping_cols = [c for c in packaging_types_df.columns
                if c not in ("provider name", "prov_exact")
                and pd.api.types.is_string_dtype(packaging_types_df[c])][:3]
if not mapping_cols:
    raise ValueError("Couldn't detect mapping columns; please set 'mapping_cols' explicitly.")

# Normalize destination columns
for col in mapping_cols:
    packaging_types_df[col + "_exact"] = packaging_types_df[col].map(ekey)

# provider name -> list of integer indices in A (rows/cols)
idx_map_exact = (
    A_index_df.groupby("prov_exact")["index"]
              .apply(lambda s: list(map(int, s)))
              .to_dict()
)

# "Use & Collection" column index
use_col = int(A_index_df.loc[A_index_df["provider name"]=="Use & Collection","index"].iloc[0])

# ---- X: abs(A[row, use_col]) per packaging provider (exact match) ----
X_by_src = {}
for k in packaging_types_df["prov_exact"].dropna().unique():
    rows = idx_map_exact.get(k, [])
    if rows:
        r0 = int(rows[0])  # if multiple rows per provider, change to sum/mean if needed
        X_by_src[k] = abs(float(A[r0, use_col]))

# ---- Y: MSW "Linear Economy" per provider (exact match) ----
Y_by_key = (collection_msw_df.dropna(subset=["prov_exact"])
            .groupby("prov_exact")["Linear Economy"]
            .first()
            .to_dict())

# ---- zero out destination cells we're going to rewrite (avoid accumulation on reruns) ----
dest_rows = set()
for _, row in packaging_types_df.iterrows():
    for col in mapping_cols:
        dk = row.get(col + "_exact", None)
        if dk in idx_map_exact:
            dest_rows.update(idx_map_exact[dk])
if dest_rows:
    A[list(dest_rows), use_col] = 0.0

# ---- assign: A[row, use_col] += X * Y for each mapped destination (exact match) ----
for _, row in packaging_types_df.iterrows():
    src_k = row["prov_exact"]
    X     = X_by_src.get(src_k)
    if X is None:
        continue
    for col in mapping_cols:
        dk = row.get(col + "_exact", None)
        if not dk:
            continue
        Y = Y_by_key.get(dk)
        if Y is None or pd.isna(Y):
            continue
        contrib = float(abs(X) * float(Y))
        for r in idx_map_exact.get(dk, []):
            A[int(r), use_col] += contrib

In [19]:
A[int(A_index_df.loc[A_index_df["provider name"]=="Disposal, Multi-material Food Bottles","index"].iloc[0]),
  int(A_index_df.loc[A_index_df["provider name"]=="Use & Collection","index"].iloc[0])]

0.021840000000000002

In [20]:
# --- Recyclate pass: X * Y_recyclate added into A[:, use_col] ---

collection_recyclate_df = collection_recyclate_df.copy()
collection_recyclate_df["prov_exact"] = collection_recyclate_df["provider name"].map(ekey)

# Y from recyclate
Y_rec_by_key = (
    collection_recyclate_df.dropna(subset=["prov_exact"])
    .groupby("prov_exact")["Linear Economy"]
    .first()    # change to sum()/mean() if needed
    .to_dict()
)

# Optionally clear destination cells before this pass (default = keep MSW values and add recyclate)
reset_dest = False
if reset_dest:
    rec_dest_rows = set()
    for _, row in packaging_types_df.iterrows():
        for col in mapping_cols:
            dk = row.get(col + "_exact", None)
            rec_dest_rows.update(idx_map_exact.get(dk, []))
    if rec_dest_rows:
        A[np.array(sorted(rec_dest_rows), dtype=int), use_col] = 0.0

# Write: A[row, use_col] += X * Y_recyclate
for _, row in packaging_types_df.iterrows():
    src_k = row["prov_exact"]
    X = X_by_src.get(src_k)
    if X is None:
        continue
    for col in mapping_cols:
        dk = row.get(col + "_exact", None)
        if not dk:
            continue
        Y = Y_rec_by_key.get(dk)
        if Y is None or pd.isna(Y):
            continue
        contrib = float(abs(X) * float(Y))
        for r in idx_map_exact.get(dk, []):
            A[int(r), use_col] += contrib

In [21]:
A[int(A_index_df.loc[A_index_df["provider name"]=="High-tech Sorting, Collected HDPE Drinking Bottles","index"].iloc[0]),
  int(A_index_df.loc[A_index_df["provider name"]=="Use & Collection","index"].iloc[0])]

0.0

### Monomers

In [22]:
monomers_decisions_df = pd.read_csv("data/monomers_decisions.csv")

In [23]:
import numpy as np
import pandas as pd

# ---- helper: normalize names for reliable matching ----
ekey = lambda s: str(s).strip().casefold()

# ---- defensive copies + normalized keys ----
A_index_df = A_index_df.copy()
monomers_decisions_df = monomers_decisions_df.copy()

A_index_df["prov_key"] = A_index_df["provider name"].map(ekey)
monomers_decisions_df["col_key"]  = monomers_decisions_df["Provider name"].map(ekey)
monomers_decisions_df["row_key"]  = monomers_decisions_df["Input parameters names"].map(ekey)

# provider key -> list of integer indices in A (rows/cols)
idx_map = (
    A_index_df.groupby("prov_key")["index"]
    .apply(lambda s: list(map(int, s)))
    .to_dict()
)

# ensure "Linear Economy" is numeric
monomers_decisions_df["Linear Economy"] = pd.to_numeric(
    monomers_decisions_df["Linear Economy"], errors="coerce"
)

# ---- apply updates: for each row in decisions, scale A[row_idx, col_idx] *= X ----
n_pairs = 0
skipped = 0

for _, drow in monomers_decisions_df.iterrows():
    x = drow["Linear Economy"]
    if pd.isna(x):
        skipped += 1
        continue

    row_idxs = idx_map.get(drow["row_key"], [])
    col_idxs = idx_map.get(drow["col_key"], [])

    if not row_idxs or not col_idxs:
        skipped += 1
        continue

    # multiply all combinations (row, col) by X
    for ri in row_idxs:
        for ci in col_idxs:
            A[int(ri), int(ci)] = np.nan_to_num(A[int(ri), int(ci)]) * float(x)
            n_pairs += 1

Updated 12 (row, col) cells in A; skipped 0 decision rows lacking matches or X.


In [25]:
A[int(A_index_df.loc[A_index_df["provider name"]=="Ethylene production, fossil, steam cracker","index"].iloc[0]),
  int(A_index_df.loc[A_index_df["provider name"]=="Ethylene","index"].iloc[0])]

-1.0

### Carbon Capture

In [26]:
carbon_capture_decisions_df = pd.read_csv("data/carbon_capture_decisions.csv")

In [27]:
import numpy as np
import pandas as pd

# normalize for reliable matching
ekey = lambda s: str(s).strip().casefold()

# copies + keys
A_index_df = A_index_df.copy()
carbon_capture_decisions_df = carbon_capture_decisions_df.copy()

A_index_df["prov_key"] = A_index_df["provider name"].map(ekey)
carbon_capture_decisions_df["col_key"] = carbon_capture_decisions_df["Provider name"].map(ekey)
carbon_capture_decisions_df["row_key"] = carbon_capture_decisions_df["Input parameters names"].map(ekey)

# provider key -> list of integer indices (row/col) in A
idx_map = (
    A_index_df.groupby("prov_key")["index"]
    .apply(lambda s: list(map(int, s)))
    .to_dict()
)

# ensure numeric X
carbon_capture_decisions_df["Linear Economy"] = pd.to_numeric(
    carbon_capture_decisions_df["Linear Economy"], errors="coerce"
)

# apply: for each decision row, A[row, col] *= X
n_pairs = 0
skipped = 0

for _, d in carbon_capture_decisions_df.iterrows():
    X = d["Linear Economy"]
    if pd.isna(X):
        skipped += 1
        continue

    row_idxs = idx_map.get(d["row_key"], [])
    col_idxs = idx_map.get(d["col_key"], [])

    if not row_idxs or not col_idxs:
        skipped += 1
        continue

    for ri in row_idxs:
        for ci in col_idxs:
            A[int(ri), int(ci)] = np.nan_to_num(A[int(ri), int(ci)]) * float(X)
            n_pairs += 1

carbon_capture_decisions_df: updated 6 cells; skipped 0 rows.


In [31]:
A[int(A_index_df.loc[A_index_df["provider name"]=="Plastic Incineration CO2 to the atmosphere","index"].iloc[0]),
  int(A_index_df.loc[A_index_df["provider name"]=="Plastic Incineration CO2","index"].iloc[0])]

1.0

### microplastics treatment

In [32]:
microplastics_decisions_df = pd.read_csv("data/microplastics_decisions.csv")

In [33]:
import numpy as np
import pandas as pd

# normalizer
ekey = lambda s: str(s).strip().casefold()

# copies + normalized keys
A_index_df = A_index_df.copy()
microplastics_decisions_df = microplastics_decisions_df.copy()

A_index_df["prov_key"] = A_index_df["provider name"].map(ekey)
microplastics_decisions_df["col_key"] = microplastics_decisions_df["Provider name"].map(ekey)
microplastics_decisions_df["row_key"] = microplastics_decisions_df["Input parameters names"].map(ekey)

# provider key -> list of integer indices in A (rows/cols)
idx_map = (
    A_index_df.groupby("prov_key")["index"]
    .apply(lambda s: list(map(int, s)))
    .to_dict()
)

# ensure numeric decision value
microplastics_decisions_df["Linear Economy"] = pd.to_numeric(
    microplastics_decisions_df["Linear Economy"], errors="coerce"
)

# apply decisions: A[row, col] *= value
n_pairs = 0
skipped = 0

for _, d in microplastics_decisions_df.iterrows():
    x = d["Linear Economy"]
    if pd.isna(x):
        skipped += 1
        continue

    row_idxs = idx_map.get(d["row_key"], [])
    col_idxs = idx_map.get(d["col_key"], [])

    if not row_idxs or not col_idxs:
        skipped += 1
        continue

    for ri in row_idxs:
        for ci in col_idxs:
            A[int(ri), int(ci)] = np.nan_to_num(A[int(ri), int(ci)]) * float(x)
            n_pairs += 1

print(f"microplastics_decisions_df: updated {n_pairs} cells; skipped {skipped} rows.")

microplastics_decisions_df: updated 17 cells; skipped 1 rows.
