# Adding data to sample model

In [1]:
import sentier_data_tools as sdt
import pandas as pd
from loguru import logger

In [2]:
sdt.reset_local_database()

In [3]:
df = pd.read_excel(
    sdt.example_data_dir / "electrolyzers.xlsx"
)

In [4]:
columns = [
    "https://vocab.sentier.dev/model-terms/generic/company",
    "https://vocab.sentier.dev/model-terms/generic/product",
    "https://vocab.sentier.dev/model-terms/energy/min_power_cons",
    "https://vocab.sentier.dev/model-terms/energy/nom_power_cons",
    "https://vocab.sentier.dev/model-terms/energy/max_power_cons",
    "https://vocab.sentier.dev/model-terms/energy/input_voltage",
    "https://vocab.sentier.dev/model-terms/generic/footprint_area",
    "https://vocab.sentier.dev/model-terms/generic/availability",
    "https://vocab.sentier.dev/model-terms/electrolyzer/min_amb_temp",
    "https://vocab.sentier.dev/model-terms/electrolyzer/max_amb_temp",
    "https://vocab.sentier.dev/model-terms/energy/elec_energy_serv_dem",
    "https://vocab.sentier.dev/model-terms/energy/therm_energy_serv_dem",
    "https://vocab.sentier.dev/model-terms/energy/therm_energy_conv_eff",
    "https://vocab.sentier.dev/model-terms/electrolyzer/temp_useful_heat",
    "https://vocab.sentier.dev/model-terms/energy/energy_conv_eff_lhv",
    "http://openenergy-platform.org/ontology/oeo/OEO_00140049",
    "https://vocab.sentier.dev/model-terms/electrolyzer/min_stack_temp",
    "https://vocab.sentier.dev/model-terms/electrolyzer/max_stack_temp",
    "https://vocab.sentier.dev/model-terms/electrolyzer/max_water_conduc",
    "https://vocab.sentier.dev/model-terms/electrolyser/max_stack_lifetime",
    "https://vocab.sentier.dev/model-terms/electrolyser/h2_quality",
    "https://vocab.sentier.dev/model-terms/electrolyser/h2_pressure",
    "https://vocab.sentier.dev/model-terms/generic/mass_prod_rate",
]

units = [
    'https://www.w3.org/2001/XMLSchema#string',
    'https://www.w3.org/2001/XMLSchema#string',
    'https://vocab.sentier.dev/units/unit/KiloW',
    'https://vocab.sentier.dev/units/unit/KiloW',
    'https://vocab.sentier.dev/units/unit/KiloW',
    'https://vocab.sentier.dev/units/unit/V',
    'https://vocab.sentier.dev/units/unit/M2',
    'https://vocab.sentier.dev/units/unit/FRACTION',
    'https://vocab.sentier.dev/units/unit/DEG_C',
    'https://vocab.sentier.dev/units/unit/DEG_C',
    'https://vocab.sentier.dev/units/unit/KiloW-HR-PER-KiloGM',
    'https://vocab.sentier.dev/units/unit/MegaJ-PER-KiloGM',
    'https://vocab.sentier.dev/units/unit/FRACTION',
    'https://vocab.sentier.dev/units/unit/DEG_C',
    'https://vocab.sentier.dev/units/unit/PERCENT',
    'https://vocab.sentier.dev/units/unit/PERCENT',
    'https://vocab.sentier.dev/units/unit/DEG_C',
    'https://vocab.sentier.dev/units/unit/DEG_C',
    'https://vocab.sentier.dev/units/unit/MicroS-PER-CentiM',
    'https://vocab.sentier.dev/units/unit/HR',
    'https://vocab.sentier.dev/units/unit/NUM',
    'https://vocab.sentier.dev/units/unit/PA',
    'https://vocab.sentier.dev/units/unit/KiloGM-PER-HR',
]

assert len(columns) == len(units)
assert len(columns) == len(df.columns) - 1

In [5]:
metadata = sdt.DatapackageWriter(
    name="electrolyzer_data_from_premise_04_2024",
    description="Electrolyzer operational in inventory data taken from https://github.com/polca/premise/tree/master/premise/data/additional_inventories in April 2024.",
    contributors=[
        {"title": "Karin Treyer", "path": "https://www.psi.ch/en/ta/people/karin-treyer", "role": "author"},
        {"title": "Chris Mutel", "path": "https://chris.mutel.org/", "role": "wrangler"}
    ],
    homepage="https://github.com/polca/premise/tree/master/premise/data/additional_inventories",
).metadata()
_ = metadata.pop("version")

In [6]:
datapackage = sdt.Datapackage(
    name="electrolyzer_data",
    metadata=metadata,
    version=1
).save()

In [7]:
TYPES = [
    ("PEM", "https://vocab.sentier.dev/products/en/page/pem-electrolyzer"),
    ("AEC", "https://vocab.sentier.dev/products/aec-electrolyzer"),
    ("SOEC", "https://vocab.sentier.dev/products/soel-electrolyzer"),
]

for kind, iri in TYPES:
    filtered = df[df['Electrolysis type'] == kind].copy()
    logger.info("Adding {} records for type {}", len(filtered), kind)
    filtered.drop(labels=['Electrolysis type'], axis='columns', inplace=True)
    filtered.columns = columns
    
    record = sdt.Record(
        data=filtered,
        product=iri,
        columns=columns,
        units=units,
        datapackage=datapackage
    ).save()

[32m2024-10-08 07:59:35.685[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1mAdding 54 records for type PEM[0m
[32m2024-10-08 07:59:35.705[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1mAdding 36 records for type AEC[0m
[32m2024-10-08 07:59:35.709[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1mAdding 3 records for type SOEC[0m


In [9]:
LIFETIMES = {
    "https://vocab.sentier.dev/products/en/page/pem-electrolyzer": 20,
    "https://vocab.sentier.dev/products/aec-electrolyzer": 27.5,
    "https://vocab.sentier.dev/products/soel-electrolyzer": 20,
}

for key, value in LIFETIMES.items():
    record = sdt.Record(
        data=pd.DataFrame([{key: value}]),
        product=key,
        columns=[key],
        units=["https://vocab.sentier.dev/units/unit/YR"],
        datapackage=datapackage
    ).save()