In [None]:
# python 3.11
# please run in the console:
# conda create -n bw25 -c cmutel brightway25
# conda activate bw25
# pip install ipykernel
%pip install numpy pandas plotly nbformat

In [None]:
# for ei3.8
%pip install bw2io==0.9.dev11

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')
#import database_explorer as dbex
import bw2analyzer as ba
import bw2data as bd
import bw2calc as bc
import bw2io as bi
import matrix_utils as mu
import bw_processing as bp

first_setup = False
NAME = "ei38-cutoff-25"

if first_setup:
    if NAME in bd.projects:
        bd.projects.delete_project(NAME, True)

bd.projects.set_current(NAME)

if first_setup:
    bi.bw2setup()

if first_setup:
    ei33 = bi.SingleOutputEcospold2Importer('/home/hurtiol/ALFA/datasets/', 'ecoInvent 3.8')
    ei33.apply_strategies()
    ei33.statistics()


Creating default biosphere

Applying strategy: normalize_units
Applying strategy: drop_unspecified_subcategories
Applying strategy: ensure_categories_are_tuples
Applied 3 strategies in 0.01 seconds
Title: Writing activities to SQLite3 database:
  Started: 02/10/2023 09:44:11
  Finished: 02/10/2023 09:44:12
  Total time elapsed: 00:00:01
  CPU %: 46.50
  Memory %: 0.01
Created database: biosphere3
Creating default LCIA methods

Applying strategy: normalize_units
Applying strategy: set_biosphere_type
Applying strategy: fix_ecoinvent_38_lcia_implementation
Applying strategy: drop_unspecified_subcategories
Applying strategy: link_iterable_by_fields
Applied 5 strategies in 4.70 seconds
Wrote 975 LCIA methods with 254388 characterization factors
Creating core data migrations

Extracting XML data from 19565 datasets
Extracted 19565 datasets in 240.52 seconds
Applying strategy: normalize_units
Applying strategy: update_ecoinvent_locations
Applying strategy: remove_zero_amount_coproducts
Applyi

In [2]:
if first_setup:
    ei33.write_database(db_name='ecoInvent 3.8')

Not able to determine geocollections for all datasets. This database is not ready for regionalization.
Title: Writing activities to SQLite3 database:
  Started: 02/10/2023 11:15:06
  Finished: 02/10/2023 11:18:25
  Total time elapsed: 00:03:18
  CPU %: 31.50
  Memory %: 0.15
Created database: ecoInvent 3.8


In [4]:
act = bd.Database("ecoInvent 3.8").search("biomethane production, high pressure from synthetic gas, wood, fluidised technology")[0]
#methods = [x for x in bd.methods if 'IPCC' in x[0]]
method = ('IPCC 2013', 'climate change', 'GWP 100a')
lca = bc.LCA({act: 1}, method = method)
lca.lci()
lca.lcia()
lca.score

0.38520281033490816

In [5]:
# modified version of the AssumeDiagonalGraphTraversal.
# includes separate calculation of positive and negative impact
import warnings
from heapq import heappop, heappush
import itertools
from functools import lru_cache

import numpy as np
from scipy import sparse

from bw2calc import spsolve, LCA

class JRCAssumedDiagonalGraphTraversal:
    """
    Traverse a supply chain, following paths of greatest impact.
    This implementation uses a queue of datasets to assess. As the supply chain is traversed, datasets inputs are added to a list sorted by LCA score. Each activity in the sorted list is assessed, and added to the supply chain graph, as long as its impact is above a certain threshold, and the maximum number of calculations has not been exceeded.
    Because the next dataset assessed is chosen by its impact, not its position in the graph, this is neither a breadth-first nor a depth-first search, but rather "importance-first".
    This class is written in a functional style - no variables are stored in *self*, only methods.
    Should be used by calling the ``calculate`` method.
    .. warning:: Graph traversal with multioutput processes only works when other inputs are substituted (see `Multioutput processes in LCA <http://chris.mutel.org/multioutput.html>`__ for a description of multiputput process math in LCA).
    """

    def calculate(self, lca, cutoff=0.005, max_calc=1e5, skip_coproducts=False):
        """
        Traverse the supply chain graph.
        Args:
            * *lca* (dict): An instance of ``bw2calc.lca.LCA``.
            * *cutoff* (float, default=0.005): Cutoff criteria to stop LCA calculations. Relative score of total, i.e. 0.005 will cutoff if a dataset has a score less than 0.5 percent of the total.
            * *max_calc* (int, default=10000): Maximum number of LCA calculations to perform.
        Returns:
            Dictionary of nodes, edges, and number of LCA calculations.
        """
        if not hasattr(lca, "supply_array"):
            lca.lci()
        if not hasattr(lca, "characterized_inventory"):
            lca.lcia()

        supply = lca.supply_array.copy()
        score = lca.score

        if score == 0:
            raise ValueError("Zero total LCA score makes traversal impossible")

        # Create matrix of LCIA CFs times biosphere flows, as these don't
        # change. This is also the unit score of each activity.
        characterized_biosphere = np.array(
            (lca.characterization_matrix * lca.biosphere_matrix).sum(axis=0)
        ).ravel()
        characterized_biosphere_neg = characterized_biosphere.copy()
        characterized_biosphere_neg[characterized_biosphere_neg > 0] = 0
        characterized_biosphere_pos = characterized_biosphere.copy()
        characterized_biosphere_pos[characterized_biosphere_pos < 0] = 0

        heap, nodes, edges = self.initialize_heap(lca, supply, characterized_biosphere, characterized_biosphere_neg, characterized_biosphere_pos)
        nodes, edges, counter = self.traverse(
            heap,
            nodes,
            edges,
            0,
            max_calc,
            cutoff,
            score,
            supply,
            characterized_biosphere,
            characterized_biosphere_neg,
            characterized_biosphere_pos,
            lca,
            skip_coproducts,
        )

        return {
            "nodes": nodes,
            "edges": edges,
            "counter": counter,
        }

    def initialize_heap(self, lca, supply, characterized_biosphere,
            characterized_biosphere_neg,
            characterized_biosphere_pos):
        """
        Create a `priority queue <http://docs.python.org/2/library/heapq.html>`_ or ``heap`` to store inventory datasets, sorted by LCA score.
        Populates the heap with each activity in ``demand``. Initial nodes are the *functional unit*, i.e. the complete demand, and each activity in the *functional unit*. Initial edges are inputs from each activity into the *functional unit*.
        The *functional unit* is an abstract dataset (as it doesn't exist in the matrix), and is assigned the index ``-1``.
        """
        heap, edges = [], []
        nodes = {-1: {"amount": 1, "cum": lca.score, "ind": 1e-6 * lca.score}}
        for index, amount in enumerate(lca.demand_array):
            if amount == 0:
                continue
            cum_score, cum_score_neg, cum_score_pos = self.cumulative_score(
                index, supply, characterized_biosphere,
            characterized_biosphere_neg,
            characterized_biosphere_pos, lca
            )
            heappush(heap, (abs(1 / cum_score), index))
            nodes[index] = {
                "amount": float(supply[index]),
                "cum": cum_score,
                "cum_neg": cum_score_neg,
                "cum_pos": cum_score_pos,
                "ind": self.unit_score(index, supply, characterized_biosphere),
            }
            edges.append(
                {
                    "to": -1,
                    "from": index,
                    "amount": amount,
                    "exc_amount": amount,
                    "impact": cum_score * amount / float(supply[index]),
                    "impact_neg": cum_score_neg * amount / float(supply[index]),
                    "impact_pos": cum_score_pos * amount / float(supply[index])
                }
            )
        return heap, nodes, edges

    def cumulative_score(self, index, supply, characterized_biosphere,
            characterized_biosphere_neg,
            characterized_biosphere_pos, lca):
        """Compute cumulative LCA score for a given activity"""
        demand = np.zeros((supply.shape[0],))
        demand[index] = (
            supply[index]
            *
            # Normalize by the production amount
            lca.technosphere_matrix[index, index]
        )
        solved_tech = spsolve(lca.technosphere_matrix, demand)
        return (float(
            (characterized_biosphere * solved_tech).sum()
        ),
        float(
            (characterized_biosphere_neg * solved_tech).sum()
        ),
        float(
            (characterized_biosphere_pos * solved_tech).sum()
        ))

    def unit_score(self, index, supply, characterized_biosphere):
        """Compute the LCA impact caused by the direct emissions and resource consumption of a given activity"""
        return float(characterized_biosphere[index] * supply[index])

    def traverse(
        self,
        heap,
        nodes,
        edges,
        counter,
        max_calc,
        cutoff,
        total_score,
        supply,
        characterized_biosphere,
        characterized_biosphere_neg,
        characterized_biosphere_pos,
        lca,
        skip_coproducts,
    ):
        """
        Build a directed graph by traversing the supply chain.
        Node ids are actually technosphere row/col indices, which makes lookup easier.
        Returns:
            (nodes, edges, number of calculations)
        """
        # static_databases = {name for name in databases if databases[name].get("static")}
        # reverse = lca.dicts.activity.reversed

        while heap:
            if counter >= max_calc:
                warnings.warn("Stopping traversal due to calculation count.")
                break
            parent_index = heappop(heap)[1]
            # Skip links from static databases
            # if static_databases and reverse[parent_index][0] in static_databases:
            #     continue

            # Assume that this activity produces its reference product
            scale_value = lca.technosphere_matrix[parent_index, parent_index]
            if scale_value == 0:
                raise ValueError(
                    "Can't rescale activities that produce zero reference product"
                )
            col = lca.technosphere_matrix[:, parent_index].tocoo()
            # Multiply by -1 because technosphere values are negative
            # (consumption of inputs) and rescale
            children = [
                (int(col.row[i]), float(-1 * col.data[i] / scale_value))
                for i in range(col.row.shape[0])
            ]
            for activity, amount in children:
                # Skip values on technosphere diagonal
                if activity == parent_index:
                    continue
                # Skip negative coproducts
                if skip_coproducts and amount <= 0:
                    continue
                counter += 1
                cumulative_score, cum_score_neg, cum_score_pos = self.cumulative_score(
                    activity, supply, characterized_biosphere, characterized_biosphere_neg, characterized_biosphere_pos, lca
                )
                if abs(cumulative_score) < abs(total_score * cutoff):
                    continue

                # flow between activity and parent (Multiply by -1 because technosphere values are negative)
                flow = (
                    -1.0
                    * lca.technosphere_matrix[activity, parent_index]
                    * supply[parent_index]
                )
                total_activity_output = (
                    lca.technosphere_matrix[activity, activity] * supply[activity]
                )

                # Edge format is (to, from, mass amount, cumulative impact)
                edges.append(
                    {
                        "to": parent_index,
                        "from": activity,
                        # Amount of this link * amount of parent demanding link
                        "amount": flow,
                        # Raw exchange value
                        "exc_amount": amount,
                        # Impact related to this flow
                        "impact": flow / total_activity_output * cumulative_score,
                        "impact_neg": flow / total_activity_output * cum_score_neg,
                        "impact_pos": flow / total_activity_output * cum_score_pos
                    }
                )
                # Want multiple incoming edges, but don't add existing node
                if activity in nodes:
                    continue
                nodes[activity] = {
                    # Total amount of this flow supplied
                    "amount": total_activity_output,
                    # Cumulative score from all flows of this activity
                    "cum": cumulative_score,
                    "cum_neg": cum_score_neg,
                    "cum_pos": cum_score_pos,
                    # Individual score attributable to environmental flows
                    # coming directory from or to this activity
                    "ind": self.unit_score(activity, supply, characterized_biosphere),
                }
                heappush(heap, (abs(1 / cumulative_score), activity))

        return nodes, edges, counter


In [14]:
trav = JRCAssumedDiagonalGraphTraversal().calculate(lca, cutoff=0.05)

In [15]:
trav['edges']

[{'to': -1,
  'from': 8136,
  'amount': 1.0,
  'exc_amount': 1.0,
  'impact': 0.3852028103349081,
  'impact_neg': -0.0041985319257974475,
  'impact_pos': 0.3894013422607055},
 {'to': 8136,
  'from': 4040,
  'amount': 0.814000010490703,
  'exc_amount': 0.8140000104904175,
  'impact': 0.05148407252302469,
  'impact_neg': -0.003796729728404199,
  'impact_pos': 0.05528080225142889},
 {'to': 8136,
  'from': 9804,
  'amount': -0.01462200004607952,
  'exc_amount': -0.01462200004607439,
  'impact': 0.03637634803738445,
  'impact_neg': -4.585507161357016e-08,
  'impact_pos': 0.03637639389245605},
 {'to': 8136,
  'from': 9946,
  'amount': 0.008120000362399089,
  'exc_amount': 0.00812000036239624,
  'impact': 0.041284385384511194,
  'impact_neg': -6.6761062231768e-08,
  'impact_pos': 0.041284452145573435},
 {'to': 8136,
  'from': 13946,
  'amount': 0.01462200004607952,
  'exc_amount': 0.01462200004607439,
  'impact': 0.024502041276781197,
  'impact_neg': -0.00026193067357826056,
  'impact_pos': 0

In [None]:
# from Romain to name the activities
id_to_key = {v:k for k, v in lca.activity_dict.items()}
activities = {id: bd.get_activity(id_to_key[id]) for id in list(trav["nodes"].keys())[1:]}
activities

In [18]:
import plotly.express as px
import math

data = dict(label = [edge["from"] for edge in trav['edges']], #[act['name'] for act in activities],
            #location = [act['location'] for act in activities],
            parent = [""] + [edge["to"] for edge in trav['edges']][1:],
            value = [math.floor(edge["impact_pos"]*1000)/1000 for edge in trav['edges']],
            #value_pct = value_pct
        )
fig = px.sunburst(
            data,
            names='label',
            parents='parent',
            values='value',
            #branchvalues="total",
            #color='value_pct',
            #color_continuous_scale='algae',
            #hover_data=['location'],
            #valueformat = '.0f',
        )
df = pd.DataFrame.from_dict(data)
display(df)
fig.show()

Unnamed: 0,label,parent,value
0,8136,,0.389
1,4040,8136.0,0.055
2,9804,8136.0,0.036
3,9946,8136.0,0.041
4,13946,8136.0,0.024
5,14409,8136.0,0.032
6,15337,8136.0,0.04
7,16404,8136.0,0.13
8,12886,16404.0,0.056
9,15009,16404.0,0.027


In [21]:
%pip list

Package              Version
-------------------- ---------
appdirs              1.4.4
asteval              0.9.28
asttokens            2.2.1
astunparse           1.6.3
attrs                22.2.0
backcall             0.2.0
brightway25          1.0.6
brotlipy             0.7.0
bw-migrations        0.1
bw-processing        0.8.2
bw2analyzer          0.11.4
bw2calc              2.0.dev12
bw2data              4.0.dev18
bw2io                0.9.dev11
bw2parameters        0.7
certifi              2022.12.7
cffi                 1.15.1
charset-normalizer   2.1.1
colorama             0.4.6
comm                 0.1.2
contourpy            1.0.7
cryptography         39.0.1
cycler               0.11.0
debugpy              1.6.6
decorator            5.1.1
docopt               0.6.2
et-xmlfile           1.1.0
executing            1.2.0
fasteners            0.17.3
fastjsonschema       2.16.2
fonttools            4.38.0
fs                   2.4.15
idna                 3.4
ipykernel            6.21.1
i