In [2]:
import pandas as pd

In [34]:
def read_consumption(
    file_netconnect: str = "data\AggregatedConsumptionData NetConnect.csv",
    file_gaspool: str = "data\Aggregated Consumption Date Market Area GASPOOL.csv",
    file_the: str = "data\AggregatedConsumptionData Trading Hub.csv",
) -> tuple([pd.Series, pd.Series, pd.Series]):
    """Reads historic natural gas consumption in MWh and returns them as a tuple of pandas series."""

    # Read NetConnect Germany CSV file
    ncg_consumption = pd.read_csv(file_netconnect, sep=";", index_col="DayOfUse")

    ncg_consumption.index = pd.to_datetime(ncg_consumption.index, format="%d.%m.%Y")

    # Convert kWh to MWh and aggregate different measurement types
    ncg_consumption = ncg_consumption.select_dtypes("number") / 1000
    ncg_consumption_aggregated = ncg_consumption.sum(axis="columns")

    # Read GASPOOL CSV file
    gaspool_consumption = pd.read_csv(file_gaspool, sep=";", index_col="Datum")

    gaspool_consumption.index = pd.to_datetime(
        gaspool_consumption.index, format="%d.%m.%Y"
    )
    gaspool_consumption_aggregated = gaspool_consumption.sum(axis="columns")

    # Read Trading Hub Europe CSV file
    the_consumption = pd.read_csv(file_the, sep=";", thousands=",", index_col="Gasday")

    the_consumption.index = pd.to_datetime(the_consumption.index, format="%d/%m/%Y")

    # Convert kWh to MWh and aggregate different measurement types
    the_consumption = the_consumption.select_dtypes("number") / 1000
    the_consumption_aggregated = the_consumption.sum(axis="columns")

    return tuple(
        [
            ncg_consumption_aggregated.sort_index(),
            gaspool_consumption_aggregated.sort_index(),
            the_consumption_aggregated.sort_index(),
        ]
    )

In [58]:
def read_temperatures(file: str = "data/open-meteo-52.55N13.41E38m.csv") -> pd.Series:
    """Reads ERA5 temperature data from a CSV file and returns a pandas Series with temperature in Celsius."""
    
    # Load the data
    data = pd.read_csv(file, sep=",", skiprows=2, header=0)

    # Convert sunshine duration from seconds to hours
    data["sunshine_duration (s)"] = data["sunshine_duration (s)"] / 3600
    data = data.rename(columns={"sunshine_duration (s)": "sunshine_duration (h)"})
    
    # Rename and format the date column
    data = data.rename(columns={"time": "Date"})
    data["Date"] = pd.to_datetime(data["Date"])
    
    # Set the date column as the index
    data.set_index("Date", inplace=True)
    
    return data

In [60]:
temperature = read_temperatures()
print(temperature.columns)
temp_2m_max = temperature["temperature_2m_max (°C)"]
print(type(temp_2m_max))

Index(['temperature_2m_max (°C)', 'temperature_2m_min (°C)',
       'temperature_2m_mean (°C)', 'sunshine_duration (h)',
       'wind_speed_10m_max (km/h)'],
      dtype='object')
<class 'pandas.core.series.Series'>


In [35]:
ncg_consumption, gaspool_consumption, the_consumption = read_consumption()
consumption_data = pd.concat([ncg_consumption.add(gaspool_consumption, fill_value=0), the_consumption])
print(consumption_data)


2011-04-01    1233962.960
2011-04-02    1006977.403
2011-04-03    1025128.699
2011-04-04    1334227.003
2011-04-05    1362010.627
                 ...     
2024-10-27    1914459.771
2024-10-28    2186914.778
2024-10-29    2330071.796
2024-10-30    2312481.125
2024-10-31    2256990.688
Length: 4963, dtype: float64
