# Partial import of Agribalyse 3.1.1

This starts out similar to other Brightway importers

In [None]:
import bw2io as bi
import bw2data as bd
from pathlib import Path
import randonneur as rn
import randonneur_data as rd

In [None]:
bd.projects.set_current("agrobalyse-3.1.1 randonneur")

In [None]:
if "ecoinvent-3.8-cutoff" not in bd.databases:
    bi.ecoinvent.import_ecoinvent_release("3.8", "cutoff", lcia=False)

In [None]:
if "ecoinvent-3.10-cutoff" not in bd.databases:
    bi.ecoinvent.import_ecoinvent_release("3.10", "cutoff")

In [None]:
fp = Path("/home/jupyter-cmutel-brightcon/data/AGB 3.1.1.csv")

We use a new importer which works with `bw_simapro_csv`. But the use of `.randonneur()` is now added to all LCI importers.

In [None]:
sp = bi.SimaProBlockCSVImporter(fp, "agrobalyse-3.1.1")

Use choose here to delete by the `code` value (which is coming from the SimaPro file) because other attributes are not unique. This is a big database with a complicated structure.

This is one of the few cases where it makes sense to do transformations on the nodes and not the edges.

In [None]:
sp.randonneur(
    "agribalyse-3.1.1-delete-aggregated-ecoinvent",
    verbs=["delete"],
    fields=["code"],
    mapping={"source": {"identifier": "code"}},
    migrate_nodes=True
)

In [None]:
sp.apply_strategies()

In [None]:
sp.statistics()

Do internal linking. Because of the SimaPro format, we don't have location fields which are always present, and they don't have a clear difference between products and processes so using reference product won't work consistently.

In [None]:
sp.match_database(fields=["name", "unit"])

In [None]:
sp.randonneur("agribalyse-3.1.1-restore-simapro-ecoinvent-names")
sp.randonneur("simapro-ecoinvent-3.8-cutoff", fields=["name"])
sp.randonneur("SimaPro-9-ecoinvent-3.9-biosphere", fields=["name", "context", "unit"])
sp.randonneur("simapro-9-ecoinvent-3-context")
sp.randonneur("SimaPro-9-ecoinvent-3.9-biosphere-manual-matches")
sp.normalize_labels_to_brightway_standard()

In [None]:
from bw2io.strategies import change_electricity_unit_mj_to_kwh
sp.apply_strategy(change_electricity_unit_mj_to_kwh)

In [None]:
sp.randonneur("ecoinvent-3.8-cutoff-ecoinvent-3.9-cutoff", fields=["name", "location", "reference product", "unit"])
sp.randonneur("ecoinvent-3.9.1-cutoff-ecoinvent-3.10-cutoff", fields=["name", "location", "reference product", "unit"])
sp.randonneur("ecoinvent-3.9.1-biosphere-ecoinvent-3.10-biosphere", fields=["code"], mapping={"source": {"uuid": "code"}, "target": {"uuid": "code"}})

In [None]:
sp.randonneur("generic-brightway-units-normalization")
sp.apply_strategy(bi.strategies.drop_unspecified_subcategories)

In [None]:
sp.match_database("ecoinvent-3.10-cutoff")
sp.match_database("ecoinvent-3.10-biosphere", kind="biosphere", fields=["code"])
sp.match_database("ecoinvent-3.10-biosphere", kind="biosphere", fields=["name", "unit", "categories"])
sp.match_database_against_top_level_context("ecoinvent-3.10-biosphere")
sp.match_database_against_only_available_in_given_context_tree("ecoinvent-3.10-biosphere")

In [None]:
sp.statistics()

In [None]:
fp = sp.write_excel(True)
fp.replace("/Users/cmutel/Desktop/" + fp.name)

In [None]:
for ds in sp.data:
    for exc in ds.get('exchanges', []):
        if exc['name'] == 'Barley grain, feed {CA-QC}| production | Cut-off, S - Copied from Ecoinvent U':
            print(exc)
            if exc.get('functional'):
                producer = ds

In [None]:
len([ds for ds in sp.data if 'AGRIBALU000000003104694' in str(ds)])

In [None]:
c = 0

for exc in sp.unlinked:
    if exc['type'] == 'technosphere':
        print(exc)
        c += 1
        if c > 5:
            break

In [None]:
exc

In [None]:
for ds in bd.Database("ecoinvent-3.10-cutoff"):
    if ds['name'] == 'market for sodium chloride, powder':
        print(ds)
        break

In [None]:
for ds in sp.data:
    if ds['code'] == 'AGRIBALU000000003112131':
        print("Found")
        break

# ds

In [None]:
for ds in sp.data:
    if ds['name'] == 'heat production, propane, at industrial furnace >100kW RoW':
        print("Found")
        break

# ds

In [None]:
[exc for exc in ds['exchanges'] if exc['type'] == 'production']

In [None]:
[exc for exc in ds['exchanges'] if exc.get("functional")]

In [None]:
{k: v for k, v in ds.items() if k != 'exchanges'}

In [None]:
[x for x in bd.Database("ecoinvent-3.8-biosphere") if 'hfc-143' in x['name'].lower()]

In [None]:
count = 0

for exc in sp.unlinked:
    if exc['type'] == 'biosphere':
        print(exc)
        count += 1
        if count > 5:
            break

In [None]:
for ds in sp.data:
    if ds['code'] == 'AGRIBALU000000003113535':
        print("Found")
        break

In [None]:
ds

In [None]:
exc

In [None]:
ds

In [None]:
for other in sp.data:
    if 'Scrap lead acid battery {RER}| treatment of, remelting | Alloc Def, U Edited U' in str(ds) and other != ds:
        break

In [None]:
other

In [None]:
names = {
    exc['name']
    for ds in sp.data 
    for exc in ds.get('exchanges') 
    if exc['name'].endswith(", S - Copied from Ecoinvent U")
}

In [None]:
'Municipal solid waste {CH}| treatment of, incineration | Cut-off, S - Copied from Ecoinvent U' in names

In [None]:
'Barley grain, feed {CA-QC}| production | Cut-off, S - Copied from Ecoinvent U' in names

In [None]:
data = [
    {"source": {"name": name}, "target": {"name": name.replace(", S - Copied from Ecoinvent U", ", U")}} 
    for name in names
]

In [None]:
dp = rn.Datapackage(
    name="agribalyse-3.1.1-restore-simapro-ecoinvent-names",
    description="""Restore names of linked ecoinvent processes to original SimaPro form.
Changes `Foo, S - Copied from Ecoinvent U` to `Foo, U`, i.e. the standard way that ecoinvent processes are given in SimaPro.
Done in preparation for harmonization and linking via standard Randonneur transformations like `simapro-ecoinvent-3.8-cutoff`.""",
    contributors=[
        {"title": "Chris Mutel", "path": "https://chris.mutel.org/", "role": "author"},
    ],
    mapping_target=rn.MappingConstants.SIMAPRO_CSV,
    mapping_source=rn.MappingConstants.SIMAPRO_CSV,
    version="1.0.0",
    source_id="agribalyse-3.1.1",
    target_id="agribalyse-3.1.1",
    graph_context=["edges"],
    homepage="https://www.data.gouv.fr/fr/datasets/agribalyse-3-1-1-synthese/",
)
dp.add_data("replace", data)
registry.add_file(dp.to_json(Path("agribalyse-3.1.1-restore-simapro-ecoinvent-names.json")), replace=True)

In [None]:
r_data = registry.get_file('agribalyse-3.1.1-restore-simapro-ecoinvent-names')['replace']

print([
    o 
    for o in r_data
    if o['source']['name'] == 'Barley grain, feed {CA-QC}| production | Cut-off, S - Copied from Ecoinvent U'
])

r_data = None

In [None]:
sp.randonneur("agribalyse-3.1.1-restore-simapro-ecoinvent-names")
sp.randonneur("simapro-ecoinvent-3.8-cutoff", fields=["name"])

In [None]:
sp.apply_strategies()

In [None]:
sp.match_database("ecoinvent-3.8-cutoff")

In [None]:
sp.statistics()

In [None]:
for exc in sp.unlinked:
    if exc['type'] == 'production':
        break

In [None]:
exc

In [None]:
for ds in sp.data:
    if exc in ds['exchanges']:
        break

In [None]:
ds

In [None]:
for ds in sp.data:
    func = [exc for exc in ds['exchanges'] if exc.get('functional')]
    if len(func) == 1 and func[0]['name'] != ds['name']:
        break

In [None]:
ds

In [None]:
for ds in sp.data:
    if ds['name'] == 'Propane, burned in industrial furnace, at plant {FR} U':
        print(ds['unit'])

In [None]:
md = registry.get_file("simapro-ecoinvent-3.8-cutoff")

In [None]:
for dct in md['replace']:
    if dct['source']['name'].startswith("Sodium chloride, powder"):
        print(dct)

In [None]:
print()

In [None]:
print(registry)