In [1]:
import pandas as pd
import sentier_data_tools as sdt
from datetime import datetime

In [2]:
sdt.reset_local_database()

DF1

In [3]:
metadata = sdt.Datapackage(
    name="electricity_markets",
    description="Electricity markets blabla",
    contributors=[
        {
            "title": "Peakachu",
            "path": "https://github.com/TimoDiepers/sentier_peakachu/",
            "role": "author",
        },
    ],
    homepage="https://github.com/TimoDiepers/sentier_peakachu/",
).metadata()

In [4]:
from sentier_peakachu.entsoe import get_generation_data
country_code = "PL"
df = get_generation_data(
    country_code=country_code,
    start=pd.Timestamp("20241008", tz="Europe/Brussels"),
    end=pd.Timestamp("20241009", tz="Europe/Brussels"),
)

In [5]:
df.index.name = "timestamp"
df = df[["Fossil Brown coal/Lignite", "Fossil Gas", "Solar"]].reset_index()
df.columns = ["timestamp", "https://example.com/coal", "https://example.com/gas", "https://example.com/pv"]

In [6]:
UNITS = ["https://example.com/units/datetime", "https://example.com/units/MW", "https://example.com/units/MW", "https://example.com/units/MW"]

In [7]:
dataset_id = sdt.Dataset(
    name=f"electricity mixes",
    data=df,
    kind=sdt.DatasetKind.BOM,
    product="http://openenergy-platform.org/ontology/oeo/OEO_00000139",
    columns=[{"iri": x, "unit": y} for x, y in zip(df.columns, UNITS)],
    metadata=metadata,
    location=f"https://example.com/locations/{country_code}",
    version=1,
    valid_from=datetime(2018, 1, 1),
    valid_to=datetime(2028, 1, 1),
).save()

In [8]:
dataset = sdt.Dataset.get_by_id(dataset_id)

In [9]:
list(sdt.Dataset.select().where(sdt.Dataset.product == "http://openenergy-platform.org/ontology/oeo/OEO_00000139"))

[<Dataset: 1>]

DF2


In [10]:
metadata = sdt.Datapackage(
    name="emission data power plants",
    description="Climate trace emission data for power plants",
    contributors=[
        {
            "title": "Karin Treyer",
            "path": "https://www.psi.ch/en/ta/people/karin-treyer",
            "role": "author",
        },
        {
            "title": "Chris Mutel",
            "path": "https://chris.mutel.org/",
            "role": "wrangler",
        },
    ],
    homepage="https://example.com/additional_inventories",
).metadata()

In [16]:
columns = ["timestamp", "powerplant_name", "https://example.com/Energy", "https://example.com/GWP100"]
timestamp = [datetime(2020, 1, 1, 0, 0, 0), datetime(2020, 1, 1, 0, 0, 0), datetime(2020, 1, 1, 0, 0, 0), datetime(2021, 1, 1, 0, 0, 0), datetime(2021, 1, 1, 0, 0, 0), datetime(2021, 1, 1, 0, 0, 0)]

powerplant_name = ["coal plant 1", "coal plant 2", "coal plant 3", "coal plant 1", "coal plant 2", "coal plant 3"] 
production_vol = [100, 200, 300, 80, 210, 270]
GWP = [50, 100, 150, 40, 110, 140]
df = pd.DataFrame(list(zip(timestamp, powerplant_name, production_vol, GWP)), columns=columns)
df

Unnamed: 0,timestamp,powerplant_name,https://example.com/Energy,https://example.com/GWP100
0,2020-01-01,coal plant 1,100,50
1,2020-01-01,coal plant 2,200,100
2,2020-01-01,coal plant 3,300,150
3,2021-01-01,coal plant 1,80,40
4,2021-01-01,coal plant 2,210,110
5,2021-01-01,coal plant 3,270,140


In [17]:
df['total_production_at_timestamp'] = df.groupby('timestamp')["https://example.com/Energy"].transform('sum')
df['EF'] = df["https://example.com/GWP100"]/ df['total_production_at_timestamp']


In [18]:
df

Unnamed: 0,timestamp,powerplant_name,https://example.com/Energy,https://example.com/GWP100,total_production_at_timestamp,EF
0,2020-01-01,coal plant 1,100,50,600,0.083333
1,2020-01-01,coal plant 2,200,100,600,0.166667
2,2020-01-01,coal plant 3,300,150,600,0.25
3,2021-01-01,coal plant 1,80,40,560,0.071429
4,2021-01-01,coal plant 2,210,110,560,0.196429
5,2021-01-01,coal plant 3,270,140,560,0.25


In [19]:
UNITS = ["https://example.com/units/datetime", "https://example.com/units/plant_name", "https://example.com/units/MWh", "https://example.com/units/MtCO2eq", "https://example.com/units/MtCO2eq/MWh"]

In [20]:
dataset_id = sdt.Dataset(
    name=f"power plant data",
    data=df,
    kind=sdt.DatasetKind.BOM,
    product="http://openenergy-platform.org/ontology/oeo/OEO_00000139",
    columns=[{"iri": x, "unit": y} for x, y in zip(df.columns, UNITS)],
    metadata=metadata,
    location=f"https://example.com/locations/{country_code}",
    version=1,
    valid_from=datetime(2018, 1, 1),
    valid_to=datetime(2028, 1, 1),
).save()