In [6]:
import csv

from enbios2.const import BASE_DATA_PATH

base_path = BASE_DATA_PATH / "temp/miquel_upscaling"

# this is the sheet: "dendrogram-technology dict"
base_denodo_gram_file = base_path / "dendrogram_generation_base.csv"
# this is the PNIEC dendrogram sheet
dendogram_file = base_path / "dendrogram_generation_dendo.csv"
base_denodo_gram_file.exists(), dendogram_file.exists()

(True, True)

In [7]:
dendo_base_reader = csv.DictReader(base_denodo_gram_file.open(encoding="utf-8"))
print(dendo_base_reader.fieldnames)
all_base_rows = list(dendo_base_reader)

dendo_reader = csv.DictReader(dendogram_file.open(encoding="utf-8"))
print(dendo_reader.fieldnames)
dendo_rows = list(dendo_reader)

['Dendrogram name', 'tech name']
['Child', 'Parent', 'Dendrogram level']


In [8]:
# ok. lets match tech-name : Dendrogram name to "Child" (in dendo_rows)
base_dendo_names = set(row['Dendrogram name'] for row in all_base_rows)
base_dendo_names_in_dendo = set(
    row['Child'] for row in dendo_rows if row["Child"])  # the if will filter out the empty ones

In [9]:
# ok, that good. all names "Dendrogram name" appear somewhere as Child (of something)
base_dendo_names - base_dendo_names_in_dendo

set()

In [10]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [133]:
from enbios2.experiment.sum_hierarchy import HierarchyNode

# ok lets build the hierarchy, or tree. dendogram is actually the last name I would give it :). Because that is the name of the plot
# we dont care about the "Dendrogram level" column, but just look at the Child - Parent relationship

base_tree: HierarchyNode = None
all_nodes: list[HierarchyNode] = []

for row in dendo_rows:
    if not row["Child"]:
        continue
    if not row["Parent"]:
        base_tree = HierarchyNode(row["Child"])
        all_nodes.append(base_tree)
    else:
        node = HierarchyNode(row["Child"].strip())
        parent_ = list(filter(lambda node: node.name == row["Parent"], all_nodes))
        if not parent_:
            print(row, "does not connect")
            print(all_nodes)
            continue
        parent = parent_[0]
        parent.add_child(node)
        all_nodes.append(node)

In [134]:
# ok, so far so good.
base_tree.assert_all_names_unique()
base_tree.as_dict()
base_tree.to_csv(base_path / "base_tree.csv", include_attrs=[], merge_first_sub_row=True)

ok this was just a test if the basic tree can be build. now lets build a mega tree.
the root is called "impacts", which has several children (ONE PER SCENARIO), which have 18 children, one for each indicator and each indicator has 2 children: onsite and offsite.
each of these 2 subtrees has some subtree of the kind we build before

In [135]:
# however, the generator table include technology names, which are actually not in the tree, but the mapping that "dendrogram_generation_base.csv" is giving.
# so lets find all those "Dendrogram name" nodes and change their names to what the "tech name" column has...
for row in all_base_rows:
    #print(row)
    node = base_tree.find_child_by_name(row['Dendrogram name'])
    assert node
    node.name = row["tech name"]

In [136]:
generation_file = base_path / "PNIEC_generation.csv"
assert generation_file.exists()
generation_rows = list(csv.DictReader(generation_file.open(encoding="utf-8")))

# first lets check if "Technologies" match the "tech name"
base_tech_names = set(r["tech name"] for r in all_base_rows)
technologies = set(r["Technologies"] for r in generation_rows)

# this is what we read from Technologies and matches tech name
real_tech_names = set()

# let's build a map from Technologies -> tech name, we need that for mapping later
technology2tree_tech_map: dict[str,str] = {}
# "el" seems to be a good split token in the Technologies column
for technology in technologies:
    assert "el" in technology
    parts = technology.split("_")
    el_index = parts.index("el")
    # the bits before el are the real tech name
    real_tech_name = "_".join(parts[:el_index])
    #print(real_tech_name)
    real_tech_names.add(real_tech_name)
    if real_tech_name in base_tech_names:
        technology2tree_tech_map[technology] = real_tech_name
    else: # we are happy that this is actually never printed
        print(real_tech_name, "not in base tech names")


In [137]:
# ok lets build the 4 top layers with one iteration over the generation list collecting all the indicators.
# let's collect the scenarios and indicators...

# here we build the scenario and indicator layers
root = HierarchyNode("impacts")

for row in generation_rows:
    if (scenario := row["Scenario"]) not in root:
        scenario_node = HierarchyNode(scenario)
        root.add_child(scenario_node)
    if (indicator := row["Indicators"]) not in scenario_node:
        scenario_node.add_child(HierarchyNode(indicator))

root.as_dict()

production_2015
terrestrial acidification potential (TAP)
production_2015
global warming potential (GWP1000)
production_2015
freshwater ecotoxicity potential (FETP)
production_2015
marine ecotoxicity potential (METP)
production_2015
terrestrial ecotoxicity potential (TETP)
production_2015
fossil fuel potential (FFP)
production_2015
freshwater eutrophication potential (FEP)
production_2015
marine eutrophication potential (MEP)
production_2015
human toxicity potential (HTPc)
production_2015
human toxicity potential (HTPnc)
production_2015
ionising radiation potential (IRP)
production_2015
agricultural land occupation (LOP)
production_2015
surplus ore potential (SOP)
production_2015
ozone depletion potential (ODPinfinite)
production_2015
particulate matter formation potential (PMFP)
production_2015
photochemical oxidant formation potential: humans (HOFP)
production_2015
photochemical oxidant formation potential: ecosystems (EOFP)
production_2015
water consumption potential (WCP)
productio

{'impacts': {'name': 'impacts',
  'children': {'production_2015': {'name': 'production_2015',
    'children': {'terrestrial acidification potential (TAP)': {'name': 'terrestrial acidification potential (TAP)',
      'children': {},
      'value': None},
     'global warming potential (GWP1000)': {'name': 'global warming potential (GWP1000)',
      'children': {},
      'value': None},
     'freshwater ecotoxicity potential (FETP)': {'name': 'freshwater ecotoxicity potential (FETP)',
      'children': {},
      'value': None},
     'marine ecotoxicity potential (METP)': {'name': 'marine ecotoxicity potential (METP)',
      'children': {},
      'value': None},
     'terrestrial ecotoxicity potential (TETP)': {'name': 'terrestrial ecotoxicity potential (TETP)',
      'children': {},
      'value': None},
     'fossil fuel potential (FFP)': {'name': 'fossil fuel potential (FFP)',
      'children': {},
      'value': None},
     'freshwater eutrophication potential (FEP)': {'name': 'freshw

In [138]:
# Here we add the Onsite/Offsite layer and for each of them a copy of the base tree
from copy import deepcopy

for leave in root.get_leaves():
    onsite = HierarchyNode("Onsite")
    offsite = HierarchyNode("Offsite")
    leave.add_child(onsite)
    leave.add_child(offsite)
    onsite.join_tree(deepcopy(base_tree))
    offsite.join_tree(deepcopy(base_tree))

lets do some introspection...

In [139]:
root, root.get_child_names() # root has 7 children. all the scenarios

([impacts - 7 children],
 ['production_2015',
  'tendential_2020',
  'tendential_2025',
  'tendential_2030',
  'target_2020',
  'target_2025',
  'target_2030'])

In [140]:
#  first indicator, has 2 children, onsite, offsite
# note. we added __getitem__ to BaseNode, so we can use the [] operator to get the children
some_scenario = root[0]
some_scenario.get_child_names(), some_scenario.get_num_children()

(['terrestrial acidification potential (TAP)',
  'global warming potential (GWP1000)',
  'freshwater ecotoxicity potential (FETP)',
  'marine ecotoxicity potential (METP)',
  'terrestrial ecotoxicity potential (TETP)',
  'fossil fuel potential (FFP)',
  'freshwater eutrophication potential (FEP)',
  'marine eutrophication potential (MEP)',
  'human toxicity potential (HTPc)',
  'human toxicity potential (HTPnc)',
  'ionising radiation potential (IRP)',
  'agricultural land occupation (LOP)',
  'surplus ore potential (SOP)',
  'ozone depletion potential (ODPinfinite)',
  'particulate matter formation potential (PMFP)',
  'photochemical oxidant formation potential: humans (HOFP)',
  'photochemical oxidant formation potential: ecosystems (EOFP)',
  'water consumption potential (WCP)'],
 18)

In [141]:
# going one step down, we have Onsite and Offsite
some_scenario[0].get_child_names(), some_scenario[0][0].get_child_names()

(['Onsite', 'Offsite'], ['renewables', 'non-renewables'])

In [142]:
# now we fill up all indicator subtrees going row by row. finding the respective technology node for onsite and offsite and filling in the value
# we will use the technology2tree_tech_map, we created before to find the technology node in the tree
for index, row in enumerate(generation_rows):
    # get the scenario subtree
    scenario_subtree = root[row["Scenario"]]

    indicator_subtree = scenario_subtree[row["Indicators"]]
    # print(scenario_subtree, indicator_subtree)
    # use get, and check if it exits, in order to prevent
    tree_tech_name = technology2tree_tech_map.get(row["Technologies"], None)
    if not tree_tech_name:
        print(row["Technologies"], "will be ignored")
    #
    # # the children are called Onsite and Offsite, exactly like the 2 columns with values
    for child in indicator_subtree.children:
        tech_node = child.find_child_by_name(tree_tech_name)
        if not tech_node:
            print("error", child.name, index, row["Technologies"], tree_tech_name)
            break
        tech_node.value = float(row[child.name]) #  child.name is either Onsite or Offsite...

    # assert tech_node

In [143]:
for indicator_subtree in root.collect_all_nodes_at_level(2):
    indicator_subtree: HierarchyNode = indicator_subtree
    indicator_subtree.calc()

In [144]:
import json
json.dump(root.as_dict(), (base_path / "complete.json").open("w", encoding="utf-8"), indent=2)

In [145]:
root.to_csv((base_path / "complete.csv"))

In [132]:
from enbios2.generic.util import safe_name

# one sanky csv file per indicator.
# ! these hierarchies do not have unique names (same subtree in Onsite/Offsite), so we need to make them unique before we can write them to csv
(base_path / "sanky").mkdir(exist_ok=True)

root.make_names_unique()

for indicator_subtree in root.collect_all_nodes_at_level(2):
    indicator_subtree.to_sanky_tree(base_path / "sanky" /
                                    f"{safe_name(indicator_subtree.name)}_sanky.csv")

hydro is in name-map: renewables_hydro
wind_onshore_less1MW is in name-map: onshore_wind_onshore_less1MW
wind_onshore_1_3MW is in name-map: onshore_wind_onshore_1_3MW
wind_onshore_more3MW is in name-map: onshore_wind_onshore_more3MW
onshore is in name-map: wind_onshore
wind_offshore is in name-map: wind_wind_offshore
wind is in name-map: renewables_wind
PV_roof_mono is in name-map: rooftop_PV_roof_mono
PV_roof_poly is in name-map: rooftop_PV_roof_poly
rooftop is in name-map: photovoltaics_rooftop
PV_openground is in name-map: photovoltaics_PV_openground
photovoltaics is in name-map: renewables_photovoltaics
biogas is in name-map: other renewables_biogas
biomass is in name-map: other renewables_biomass
solar_thermoelectric is in name-map: other renewables_solar_thermoelectric
geothermal is in name-map: other renewables_geothermal
other renewables is in name-map: renewables_other renewables
renewables is in name-map: Offsite_renewables
coal_less300MW is in name-map: coal_coal_less300MW
c