# 1. General Problem Set-up

## Import packages

In [1]:
import pandas as pd
import numpy as np

## Import LCA matrices from OpenLCA

In [2]:
A_df = pd.read_csv("data/A.csv", header=None)
B_df = pd.read_csv("data/B.csv", header=None)
C_df = pd.read_csv("data/C.csv", header=None)

# Convert all string-looking numbers to floats
A = A_df.apply(pd.to_numeric, errors='coerce').values
B = B_df.apply(pd.to_numeric, errors='coerce').values
C = C_df.apply(pd.to_numeric, errors='coerce').values

In [3]:
A_index_df = pd.read_csv("data/index_A.csv")
B_index_df = pd.read_csv("data/index_B.csv")
C_index_df = pd.read_csv("data/index_C.csv")

## Removing Transportation (deregionalization)

In [4]:
A_transport_df = pd.read_csv("data/Transportation_A.csv")

In [5]:
# create a dict mapping each provider name to all its indices in A_index_df
mapping = A_index_df.groupby('provider name')['index'].apply(list)

# build a single flat list of all matching indices for the foreground processes
matched_indices_transport = [
    idx
    for name in A_transport_df['provider name']
    if name in mapping
    for idx in mapping[name]
]

In [6]:
import numpy as np

# matched_indices_transport is the list of indices to remove
to_drop = np.array(sorted(set(matched_indices_transport), key=int))

# 1) Remove from A_index_df
mask_keep = ~A_index_df['index'].isin(to_drop)
A_index_df = A_index_df.loc[mask_keep].copy()

# 2) Remove corresponding rows and columns from A
A = np.delete(A, to_drop, axis=0)  # remove rows
A = np.delete(A, to_drop, axis=1)  # remove columns

# 3) Remove the same columns from B (keep rows)
B = np.delete(B, to_drop, axis=1)

# 4) Reset the index column in A_index_df
A_index_df['index'] = np.arange(len(A_index_df), dtype=int)

## Aggregating electricity

In [7]:
A_elec_df = pd.read_csv("data/Electricity_A.csv")

In [8]:
# Inputs assumed:
# A : numeric numpy array (rows x cols)
# A_index_df : DataFrame with columns ["index", "provider name", "flow name", ...]
# A_elec_df : DataFrame with column ["provider name"] listing all electricity providers
# The indices in A_index_df["index"] align with both row and column positions of A.

# 0) Build the set of electricity provider names
elec_names = set(A_elec_df['provider name'].dropna().astype(str).unique())

# 1) Find their indices in A_index_df
elec_idx = A_index_df.loc[A_index_df['provider name'].astype(str).isin(elec_names), 'index'].astype(int).unique()

# 2) Locate the mix row index (must exist)
mix_name = "Electricity Mix (Global)"
mix_rows = A_index_df.loc[A_index_df['provider name'] == mix_name, 'index'].astype(int).unique()
if len(mix_rows) == 0:
    raise ValueError("Electricity Mix (Global) not found in A_index_df['provider name'].")
mix_idx = int(mix_rows[0])

# Ensure the mix row is not purged
elec_idx_set = set(map(int, elec_idx))
elec_idx_wo_mix = sorted(elec_idx_set - {mix_idx})

# 3) Aggregate: add all electricity rows (except the mix row) into the mix row, column-wise
if len(elec_idx_wo_mix) > 0:
    # in case of NaNs
    add_block = np.nansum(A[elec_idx_wo_mix, :], axis=0)
    A[mix_idx, :] = np.nan_to_num(A[mix_idx, :]) + np.nan_to_num(add_block)

# 4) Decide what to drop
rows_to_drop = np.array(elec_idx_wo_mix, dtype=int)            # drop electricity rows except the mix row
cols_to_drop = np.array(elec_idx_wo_mix, dtype=int)            # drop electricity columns except the mix column

# (Optionally also drop the mix COLUMN; keep it if you want to retain that process as a column)
# To ALSO drop the mix column, uncomment the next line:
# cols_to_drop = np.array(sorted(elec_idx_set), dtype=int)

# 5) Remove rows/columns from A
if rows_to_drop.size > 0:
    A = np.delete(A, rows_to_drop, axis=0)
if cols_to_drop.size > 0:
    A = np.delete(A, cols_to_drop, axis=1)

# 6) Remove the same rows from A_index_df (only rows; columns in A_index_df are metadata)
if len(elec_idx_wo_mix) > 0:
    keep_mask = ~A_index_df['index'].astype(int).isin(elec_idx_wo_mix)
    A_index_df = A_index_df.loc[keep_mask].copy()

# 7) Reset the "index" column in A_index_df to reflect 0..n-1 after deletions
A_index_df['index'] = np.arange(len(A_index_df), dtype=int)

## Identifying background flows for cost calculation

In [9]:
A_foreground_df = pd.read_csv("data/Foreground_A.csv")

In [11]:
import numpy as np
import pandas as pd

# Helper for consistent comparison
norm = lambda s: str(s).strip().casefold()

# Normalize keys
A_index_df = A_index_df.copy()
A_foreground_df = A_foreground_df.copy()
A_index_df['provider_key'] = A_index_df['provider name'].map(norm)
A_foreground_df['provider_key'] = A_foreground_df['provider name'].map(norm)

# Build mapping from provider -> indices
idx_map = (
    A_index_df
    .groupby('provider_key')['index']
    .apply(lambda s: list(map(int, s)))
    .to_dict()
)

# Foreground indices (to exclude later)
matched_indices = sorted({
    idx
    for key in A_foreground_df['provider_key'].unique()
    for idx in idx_map.get(key, [])
})

# Example: build all_nonzero from the numeric matrix A
# (take all row indices with nonzero entries across the columns in matched_indices)
all_nonzero = set()
for col in matched_indices:
    all_nonzero.update(np.nonzero(A[:, col])[0])

# Remove overlap
foreground_set = set(matched_indices)
all_nonzero_set = {int(x) for x in all_nonzero}
filtered_nonzero_rows = sorted(all_nonzero_set - foreground_set)


In [12]:
result_df = A_index_df[A_index_df['index'].isin(filtered_nonzero_rows)].copy()

In [13]:
result_df.to_csv('filtered_nonzero_rows_with_names.csv', index=False)

## Importing financial data for cost calculation

In [15]:
financial_df = pd.read_csv("data/Financial.csv", header=None)

# 2. Static Analysis

## Scenario Generation

### Electricity Mix

In [None]:
electricity_mix_df = pd.read_csv("data/electricity_mix.csv")

### Collection and Sorting