# 1. General Problem Set-up

## Import packages

In [1]:
import pandas as pd
import numpy as np

## Import LCA matrices from OpenLCA

In [2]:
A_df = pd.read_csv("data/A.csv", header=None)
B_df = pd.read_csv("data/B.csv", header=None)
C_df = pd.read_csv("data/C.csv", header=None)

# Convert all string-looking numbers to floats
A = A_df.apply(pd.to_numeric, errors='coerce').values
B = B_df.apply(pd.to_numeric, errors='coerce').values
C = C_df.apply(pd.to_numeric, errors='coerce').values

In [3]:
A_index_df = pd.read_csv("data/index_A.csv")
B_index_df = pd.read_csv("data/index_B.csv")
C_index_df = pd.read_csv("data/index_C.csv")

## Removing Transportation (deregionalization)

In [4]:
A_transport_df = pd.read_csv("data/Transportation_A.csv")

In [5]:
# create a dict mapping each provider name to all its indices in A_index_df
mapping = A_index_df.groupby('provider name')['index'].apply(list)

# build a single flat list of all matching indices for the foreground processes
matched_indices_transport = [
    idx
    for name in A_transport_df['provider name']
    if name in mapping
    for idx in mapping[name]
]

In [6]:
import numpy as np

# matched_indices_transport is the list of indices to remove
to_drop = np.array(sorted(set(matched_indices_transport), key=int))

# 1) Remove from A_index_df
mask_keep = ~A_index_df['index'].isin(to_drop)
A_index_df = A_index_df.loc[mask_keep].copy()

# 2) Remove corresponding rows and columns from A
A = np.delete(A, to_drop, axis=0)  # remove rows
A = np.delete(A, to_drop, axis=1)  # remove columns

# 3) Remove the same columns from B (keep rows)
B = np.delete(B, to_drop, axis=1)

# 4) Reset the index column in A_index_df
A_index_df['index'] = np.arange(len(A_index_df), dtype=int)

## Aggregating electricity

In [7]:
A_elec_df = pd.read_csv("data/Electricity_A.csv")

In [8]:
# Inputs assumed:
# A : numeric numpy array (rows x cols)
# A_index_df : DataFrame with columns ["index", "provider name", "flow name", ...]
# A_elec_df : DataFrame with column ["provider name"] listing all electricity providers
# The indices in A_index_df["index"] align with both row and column positions of A.

# 0) Build the set of electricity provider names
elec_names = set(A_elec_df['provider name'].dropna().astype(str).unique())

# 1) Find their indices in A_index_df
elec_idx = A_index_df.loc[A_index_df['provider name'].astype(str).isin(elec_names), 'index'].astype(int).unique()

# 2) Locate the mix row index (must exist)
mix_name = "Electricity Mix (Global)"
mix_rows = A_index_df.loc[A_index_df['provider name'] == mix_name, 'index'].astype(int).unique()
if len(mix_rows) == 0:
    raise ValueError("Electricity Mix (Global) not found in A_index_df['provider name'].")
mix_idx = int(mix_rows[0])

# Ensure the mix row is not purged
elec_idx_set = set(map(int, elec_idx))
elec_idx_wo_mix = sorted(elec_idx_set - {mix_idx})

# 3) Aggregate: add all electricity rows (except the mix row) into the mix row, column-wise
if len(elec_idx_wo_mix) > 0:
    # in case of NaNs
    add_block = np.nansum(A[elec_idx_wo_mix, :], axis=0)
    A[mix_idx, :] = np.nan_to_num(A[mix_idx, :]) + np.nan_to_num(add_block)

# 4) Decide what to drop
rows_to_drop = np.array(elec_idx_wo_mix, dtype=int)            # drop electricity rows except the mix row
cols_to_drop = np.array(elec_idx_wo_mix, dtype=int)            # drop electricity columns except the mix column

# (Optionally also drop the mix COLUMN; keep it if you want to retain that process as a column)
# To ALSO drop the mix column, uncomment the next line:
# cols_to_drop = np.array(sorted(elec_idx_set), dtype=int)

# 5) Remove rows/columns from A
if rows_to_drop.size > 0:
    A = np.delete(A, rows_to_drop, axis=0)
if cols_to_drop.size > 0:
    A = np.delete(A, cols_to_drop, axis=1)

# 6) Remove the same rows from A_index_df (only rows; columns in A_index_df are metadata)
if len(elec_idx_wo_mix) > 0:
    keep_mask = ~A_index_df['index'].astype(int).isin(elec_idx_wo_mix)
    A_index_df = A_index_df.loc[keep_mask].copy()

# 7) Reset the "index" column in A_index_df to reflect 0..n-1 after deletions
A_index_df['index'] = np.arange(len(A_index_df), dtype=int)

## Identifying background flows for cost calculation

In [9]:
A_foreground_df = pd.read_csv("data/Foreground_A.csv")

In [10]:
import numpy as np
import pandas as pd

# Helper for consistent comparison
norm = lambda s: str(s).strip().casefold()

# Normalize keys
A_index_df = A_index_df.copy()
A_foreground_df = A_foreground_df.copy()
A_index_df['provider_key'] = A_index_df['provider name'].map(norm)
A_foreground_df['provider_key'] = A_foreground_df['provider name'].map(norm)

# Build mapping from provider -> indices
idx_map = (
    A_index_df
    .groupby('provider_key')['index']
    .apply(lambda s: list(map(int, s)))
    .to_dict()
)

# Foreground indices (to exclude later)
matched_indices = sorted({
    idx
    for key in A_foreground_df['provider_key'].unique()
    for idx in idx_map.get(key, [])
})

# Example: build all_nonzero from the numeric matrix A
# (take all row indices with nonzero entries across the columns in matched_indices)
all_nonzero = set()
for col in matched_indices:
    all_nonzero.update(np.nonzero(A[:, col])[0])

# Remove overlap
foreground_set = set(matched_indices)
all_nonzero_set = {int(x) for x in all_nonzero}
filtered_nonzero_rows = sorted(all_nonzero_set - foreground_set)


In [11]:
result_df = A_index_df[A_index_df['index'].isin(filtered_nonzero_rows)].copy()

In [12]:
result_df.to_csv('filtered_nonzero_rows_with_names.csv', index=False)

## Importing financial data for cost calculation

In [13]:
financial_df = pd.read_csv("data/Financial.csv", header=None)

# 2. Static Optimization

## 2.1. Base Case (Linear Fossil Economy in 2025)

### Electricity Mix

In [14]:
# Global Electricity Mix for 2025
electricity_mix_df = pd.read_csv("data/electricity_mix.csv")

In [15]:
import numpy as np
import pandas as pd

# Helper to normalize keys (to avoid case/space mismatches)
norm = lambda s: str(s).strip().casefold()

# Normalize provider names in both DataFrames
A_index_df = A_index_df.copy()
electricity_mix_df = electricity_mix_df.copy()
A_index_df['provider_key'] = A_index_df['provider name'].map(norm)
electricity_mix_df['provider_key'] = electricity_mix_df['provider name'].map(norm)

# Find the column index for "Electricity Mix (Global)"
mix_key = norm("Electricity Mix (Global)")
mix_idx_arr = A_index_df.loc[A_index_df['provider_key'] == mix_key, 'index'].astype(int).values
if len(mix_idx_arr) == 0:
    raise ValueError("'Electricity Mix (Global)' not found in A_index_df['provider name']")
mix_col = int(mix_idx_arr[0])

# Iterate over providers in electricity_mix_df and assign values into A
for _, row in electricity_mix_df.iterrows():
    prov_key = row['provider_key']
    energy_val = row['2025 Energy Mix']

    # find the row index (from A_index_df) for this provider
    idxs = A_index_df.loc[A_index_df['provider_key'] == prov_key, 'index'].astype(int).values
    for ridx in idxs:
        A[ridx, mix_col] = energy_val

In [16]:
A[int(A_index_df.loc[A_index_df["provider name"]=="Electricity, hydropower (life cycle)","index"].iloc[0]),
  int(A_index_df.loc[A_index_df["provider name"]=="Electricity Mix (Global)","index"].iloc[0])]

-0.1432

### Collection and Sorting

In [17]:
packaging_types_df = pd.read_csv("data/packaging_types.csv")
collection_recyclate_df = pd.read_csv("data/collection_Recyclate.csv")
collection_msw_df = pd.read_csv("data/collection_MSW.csv")

In [18]:
# ---- helpers ----
norm = lambda s: str(s).strip().casefold()

# defensive copies + normalized keys
A_index_df = A_index_df.copy()
packaging_types_df = packaging_types_df.copy()
collection_recyclate_df = collection_recyclate_df.copy()

A_index_df['provider_key'] = A_index_df['provider name'].map(norm)
packaging_types_df['provider_key'] = packaging_types_df['provider name'].map(norm)
collection_recyclate_df['provider_key'] = collection_recyclate_df['provider name'].map(norm)

# map provider -> list of integer indices in A_index_df["index"]
idx_map = (
    A_index_df.groupby('provider_key')['index']
    .apply(lambda s: list(map(int, s)))
    .to_dict()
)

# 1) PACKAGING: match providers and store their indices
packaging_keys = set(packaging_types_df['provider_key'].dropna().unique())
packaging_indices = sorted({idx for k in packaging_keys for idx in idx_map.get(k, [])})

# 2) find the column index for "Use & Collection"
use_col_key = norm("Use & Collection")
use_col_arr = A_index_df.loc[A_index_df['provider_key'] == use_col_key, 'index'].astype(int).values
if len(use_col_arr) == 0:
    raise ValueError('"Use & Collection" not found in A_index_df["provider name"].')
use_col = int(use_col_arr[0])

# 3) Build X (absolute values from A at packaging rows, Use & Collection column), keyed by provider
#    If a provider maps to multiple row indices, we take the value per row; but we store by provider_key for alignment.
X_by_key = {}
for k in packaging_keys:
    rows = idx_map.get(k, [])
    if not rows:
        continue
    # if multiple rows for same provider key, take the first (or sum; pick logic as needed)
    r0 = int(rows[0])
    X_by_key[k] = abs(float(A[r0, use_col]))

# 4) COLLECTION-RECYCLATE: match providers & store their indices and Y values
collection_keys = set(collection_recyclate_df['provider_key'].dropna().unique())
collection_indices = sorted({idx for k in collection_keys for idx in idx_map.get(k, [])})

# pull Y for each provider key (Linear Economy)
Y_by_key = (
    collection_recyclate_df
    .dropna(subset=['provider_key'])
    .groupby('provider_key')['Linear Economy']
    .first()  # if duplicates, take the first; adjust if you need sum/mean
    .to_dict()
)

# 5) Assign back to A: value = X * Y for each matching provider (aligned by provider_key)
for k in collection_keys:
    if k not in Y_by_key or k not in idx_map:
        continue
    # If this provider wasn't in packaging (no X), treat X as 0 (or skip)
    X_val = X_by_key.get(k, 0.0)
    Y_val = float(Y_by_key[k])

    for r in idx_map[k]:
        A[int(r), use_col] = X_val * Y_val

# --- stored items you asked for ---
# packaging_indices  -> indices for matched providers from packaging_types_df
# use_col            -> the "Use & Collection" column index in A
# collection_indices -> indices for matched providers from collection_recyclate_df

In [19]:
A[int(A_index_df.loc[A_index_df["provider name"]=="High-tech Sorting, Collected HDPE Drinking Bottles","index"].iloc[0]),
  int(A_index_df.loc[A_index_df["provider name"]=="Use & Collection","index"].iloc[0])]

0.0

In [27]:
import numpy as np
import pandas as pd

# ---- helpers ----
norm = lambda s: str(s).strip().casefold()

# defensive copies + normalized keys
A_index_df = A_index_df.copy()
packaging_types_df = packaging_types_df.copy()
collection_msw_df = collection_msw_df.copy()

A_index_df['provider_key'] = A_index_df['provider name'].map(norm)
packaging_types_df['provider_key'] = packaging_types_df['provider name'].map(norm)
collection_msw_df['provider_key'] = collection_msw_df['provider name'].map(norm)

# map provider -> list of integer indices in A_index_df["index"]
idx_map = (
    A_index_df.groupby('provider_key')['index']
    .apply(lambda s: list(map(int, s)))
    .to_dict()
)

# 1) PACKAGING: match providers and store their indices
packaging_keys = set(packaging_types_df['provider_key'].dropna().unique())
packaging_indices = sorted({idx for k in packaging_keys for idx in idx_map.get(k, [])})

# 2) find the column index for "Use & Collection"
use_col_key = norm("Use & Collection")
use_col_arr = A_index_df.loc[A_index_df['provider_key'] == use_col_key, 'index'].astype(int).values
if len(use_col_arr) == 0:
    raise ValueError('"Use & Collection" not found in A_index_df["provider name"].')
use_col = int(use_col_arr[0])

# 3) Build X (absolute values from A at packaging rows, Use & Collection column), keyed by provider
X_by_key = {}
for k in packaging_keys:
    rows = idx_map.get(k, [])
    if not rows:
        continue
    r0 = int(rows[0])  # if multiple rows per provider, adjust (sum/mean) as needed
    X_by_key[k] = abs(float(A[r0, use_col]))

# 4) COLLECTION-MSW: match providers & store their indices and Y values
collection_keys = set(collection_msw_df['provider_key'].dropna().unique())
collection_indices = sorted({idx for k in collection_keys for idx in idx_map.get(k, [])})

# pull Y for each provider key (Linear Economy)
Y_by_key = (
    collection_msw_df
    .dropna(subset=['provider_key'])
    .groupby('provider_key')['Linear Economy']
    .first()  # if duplicates, take the first; change to sum()/mean() if needed
    .to_dict()
)

# 5) Assign back to A: value = X * Y for each matching provider (aligned by provider_key)
for k in collection_keys:
    if k not in Y_by_key or k not in idx_map:
        continue
    X_val = X_by_key.get(k, 0.0)    # if provider not in packaging, X defaults to 0
    Y_val = float(Y_by_key[k])

    for r in idx_map[k]:
        A[int(r), use_col] = X_val * Y_val

# --- Stored results ---
# packaging_indices  -> indices for matched providers from packaging_types_df
# use_col            -> the "Use & Collection" column index in A
# collection_indices -> indices for matched providers from collection_msw_df

In [28]:
A[int(A_index_df.loc[A_index_df["provider name"]=="Disposal, LDPE Food Packaging Film (small format)","index"].iloc[0]),
  int(A_index_df.loc[A_index_df["provider name"]=="Use & Collection","index"].iloc[0])]

0.0

In [29]:
target = "Disposal, LDPE Food Packaging Film (small format)"
k = str(target).strip().casefold()

print("In packaging_types_df?", k in packaging_types_df['provider name'].str.strip().str.casefold().unique())
print("X_by_key has it?", k in X_by_key)
print("X value:", X_by_key.get(k, None))

# What’s currently in A at that row & Use & Collection col?
row_idx = int(A_index_df.loc[A_index_df["provider name"]==target,"index"].iloc[0])
use_col = int(A_index_df.loc[A_index_df["provider name"]=="Use & Collection","index"].iloc[0])
print("Existing A[row,use_col]:", A[row_idx, use_col])


In packaging_types_df? False
X_by_key has it? False
X value: None
Existing A[row,use_col]: 0.0


In [30]:
import numpy as np
import pandas as pd

norm = lambda s: str(s).strip().casefold()

# normalize
A_index_df = A_index_df.copy()
collection_msw_df = collection_msw_df.copy()
A_index_df['provider_key'] = A_index_df['provider name'].map(norm)
collection_msw_df['provider_key'] = collection_msw_df['provider name'].map(norm)

# map provider -> list of indices (rows) in A_index_df["index"]
idx_map = (
    A_index_df.groupby('provider_key')['index']
    .apply(lambda s: list(map(int, s)))
    .to_dict()
)

# find Use & Collection column index
use_col_key = norm("Use & Collection")
use_col_arr = A_index_df.loc[A_index_df['provider_key'] == use_col_key, 'index'].astype(int).values
if len(use_col_arr) == 0:
    raise ValueError('"Use & Collection" not found in A_index_df["provider name"].')
use_col = int(use_col_arr[0])

# Y from collection_msw_df ("Linear Economy")
Y_by_key = (
    collection_msw_df
    .dropna(subset=['provider_key'])
    .groupby('provider_key')['Linear Economy']
    .first()
    .to_dict()
)

# update A: for each matched provider row r, set A[r, use_col] = abs(A[r, use_col]) * Y
for k, Y_val in Y_by_key.items():
    for r in idx_map.get(k, []):
        r = int(r)
        X_val = abs(float(A[r, use_col]))   # <-- compute X from A at that row
        A[r, use_col] = X_val * float(Y_val)

In [31]:
A[int(A_index_df.loc[A_index_df["provider name"]=="Disposal, LDPE Food Packaging Film (small format)","index"].iloc[0]),
  int(A_index_df.loc[A_index_df["provider name"]=="Use & Collection","index"].iloc[0])]

0.0

In [32]:
p = "Disposal, LDPE Food Packaging Film (small format)"
r = int(A_index_df.loc[A_index_df["provider name"]==p, "index"].iloc[0])
c = int(A_index_df.loc[A_index_df["provider name"]=="Use & Collection", "index"].iloc[0])
print("After update:", A[r, c])


After update: 0.0


In [22]:
# after building X_by_key
print("Set X (absolute values from A at packaging rows, Use & Collection column):")
for k, v in X_by_key.items():
    prov_name = A_index_df.loc[A_index_df['provider_key'] == k, 'provider name'].iloc[0]
    print(f"{prov_name}: {v}")


Set X (absolute values from A at packaging rows, Use & Collection column):
PVC Other Non-food Rigid: 0.013
HDPE Food Bottles: 0.008
PP Food Bottles: 0.004
PS Other Food Rigid: 0.025
HDPE Drinking Bottles: 0.02
PET Other Non-food Rigid: 0.02
PP Food Packaging Film (med-large format): 0.026
PP Other Food Rigid: 0.138
Multi-material Food Bottles: 0.028
PET Other Food Rigid: 0.025
HDPE Other Food Rigid : 0.027
PET Non-food Bottles: 0.014
PET Food Bottles: 0.008
Multi-material Food Packaging Film (small format): 0.07
LDPE Food Packaging Film (med-large format): 0.124
PP Food Packaging Film (small format): 0.005
PP Non-food Bottles: 0.005
PVC Other Food Rigid: 0.011
HDPE Other Non-food Rigid: 0.01
HDPE Non-food Bottles: 0.056
Multi-material Food Packaging Film (med-large format): 0.007
LDPE Food Packaging Film (small format): 0.025
PET Drinking Bottles: 0.182
Multi-material Non-food Packaging Film: 0.025
LDPE Non-food Packaging Films : 0.082
PP Non-food Packaging Films : 0.018
PP Other Non-f

In [23]:
print("Set Y (Linear Economy values from collection_recyclate_df):")
for k, v in Y_by_key.items():
    prov_name = A_index_df.loc[A_index_df['provider_key'] == k, 'provider name'].iloc[0]
    print(f"{prov_name}: {v}")

Set Y (Linear Economy values from collection_recyclate_df):
Disposal, HDPE Drinking Bottles: 0.96
Disposal, HDPE Food Bottles: 0.96
Disposal, HDPE Non-food Bottles: 0.96
Disposal, HDPE Other Food Rigid : 0.96
Disposal, HDPE Other Non-food Rigid: 0.96
Disposal, LDPE Food Packaging Film (med-large format): 0.81
Disposal, LDPE Food Packaging Film (small format): 0.8
Disposal, LDPE Non-food Packaging Films : 0.81
Disposal, Multi-material Food Bottles: 0.78
Disposal, Multi-material Food Packaging Film (med-large format): 0.78
Disposal, Multi-material Food Packaging Film (small format): 0.78
Disposal, Multi-material Non-food Packaging Film: 0.78
Disposal, PET Drinking Bottles: 0.96
Disposal, PET Food Bottles: 0.96
Disposal, PET Non-food Bottles: 0.96
Disposal, PET Other Food Rigid: 0.96
Disposal, PET Other Non-food Rigid: 0.96
Disposal, PP Food Bottles: 0.96
Disposal, PP Food Packaging Film (med-large format): 0.81
Disposal, PP Food Packaging Film (small format): 0.8
Disposal, PP Non-food Bo

In [33]:
import numpy as np

target = "Disposal, LDPE Food Packaging Film (small format)"
use_col_name = "Use & Collection"

norm = lambda s: str(s).strip().casefold()

# indexes
r_matches = A_index_df.loc[A_index_df["provider name"] == target, "index"].astype(int).tolist()
c_matches = A_index_df.loc[A_index_df["provider name"] == use_col_name, "index"].astype(int).tolist()
assert r_matches and c_matches, "Row/column not found."

r = r_matches[0]
c = c_matches[0]

# keys
A_index_df["provider_key"] = A_index_df["provider name"].map(norm)
collection_msw_df["provider_key"] = collection_msw_df["provider name"].map(norm)
k = norm(target)

# Y from MSW
Y_series = collection_msw_df.loc[collection_msw_df["provider_key"] == k, "Linear Economy"]
Y_val = float(Y_series.iloc[0]) if not Y_series.empty else None

# X from A at (r, use_col)
X_val = float(A[r, c])

print("Row index (r):", r)
print("Use&Collection col (c):", c)
print("X before update (A[r,c]):", X_val)
print("Y (Linear Economy):", Y_val)

# what did our update write?
print("A[r,c] after update:", float(A[r, c]))


Row index (r): 102
Use&Collection col (c): 0
X before update (A[r,c]): 0.0
Y (Linear Economy): 0.8
A[r,c] after update: 0.0
