In [1]:
from scheduler_vm_task.master_machine.weather_data_utils import (
    get_points_over_country,
    get_multiple_historic_data,
    get_emission_data,
    average_emission_data,
    get_historic_data,
    parameters,
)

In [2]:
# fmt: off
countryISOMapping = { 'AF': 'AFG', 'AX': 'ALA', 'AL': 'ALB', 'DZ': 'DZA', 'AS': 'ASM', 'AD': 'AND', 'AO': 'AGO', 'AI': 'AIA', 'AQ': 'ATA', 'AG': 'ATG', 'AR': 'ARG', 'AM': 'ARM', 'AW': 'ABW', 'AU': 'AUS', 'AT': 'AUT', 'AZ': 'AZE', 'BS': 'BHS', 'BH': 'BHR', 'BD': 'BGD', 'BB': 'BRB', 'BY': 'BLR', 'BE': 'BEL', 'BZ': 'BLZ', 'BJ': 'BEN', 'BM': 'BMU', 'BT': 'BTN', 'BO': 'BOL', 'BA': 'BIH', 'BW': 'BWA', 'BV': 'BVT', 'BR': 'BRA', 'VG': 'VGB', 'IO': 'IOT', 'BN': 'BRN', 'BG': 'BGR', 'BF': 'BFA', 'BI': 'BDI', 'KH': 'KHM', 'CM': 'CMR', 'CA': 'CAN', 'CV': 'CPV', 'KY': 'CYM', 'CF': 'CAF', 'TD': 'TCD', 'CL': 'CHL', 'CN': 'CHN', 'HK': 'HKG', 'MO': 'MAC', 'CX': 'CXR', 'CC': 'CCK', 'CO': 'COL', 'KM': 'COM', 'CG': 'COG', 'CD': 'COD', 'CK': 'COK', 'CR': 'CRI', 'CI': 'CIV', 'HR': 'HRV', 'CU': 'CUB', 'CY': 'CYP', 'CZ': 'CZE', 'DK': 'DNK', 'DJ': 'DJI', 'DM': 'DMA', 'DO': 'DOM', 'EC': 'ECU', 'EG': 'EGY', 'SV': 'SLV', 'GQ': 'GNQ', 'ER': 'ERI', 'EE': 'EST', 'ET': 'ETH', 'FK': 'FLK', 'FO': 'FRO', 'FJ': 'FJI', 'FI': 'FIN', 'FR': 'FRA', 'GF': 'GUF', 'PF': 'PYF', 'TF': 'ATF', 'GA': 'GAB', 'GM': 'GMB', 'GE': 'GEO', 'DE': 'DEU', 'GH': 'GHA', 'GI': 'GIB', 'GR': 'GRC', 'GL': 'GRL', 'GD': 'GRD', 'GP': 'GLP', 'GU': 'GUM', 'GT': 'GTM', 'GG': 'GGY', 'GN': 'GIN', 'GW': 'GNB', 'GY': 'GUY', 'HT': 'HTI', 'HM': 'HMD', 'VA': 'VAT', 'HN': 'HND', 'HU': 'HUN', 'IS': 'ISL', 'IN': 'IND', 'ID': 'IDN', 'IR': 'IRN', 'IQ': 'IRQ', 'IE': 'IRL', 'IM': 'IMN', 'IL': 'ISR', 'IT': 'ITA', 'JM': 'JAM', 'JP': 'JPN', 'JE': 'JEY', 'JO': 'JOR', 'KZ': 'KAZ', 'KE': 'KEN', 'KI': 'KIR', 'KP': 'PRK', 'KR': 'KOR', 'KW': 'KWT', 'KG': 'KGZ', 'LA': 'LAO', 'LV': 'LVA', 'LB': 'LBN', 'LS': 'LSO', 'LR': 'LBR', 'LY': 'LBY', 'LI': 'LIE', 'LT': 'LTU', 'LU': 'LUX', 'MK': 'MKD', 'MG': 'MDG', 'MW': 'MWI', 'MY': 'MYS', 'MV': 'MDV', 'ML': 'MLI', 'MT': 'MLT', 'MH': 'MHL', 'MQ': 'MTQ', 'MR': 'MRT', 'MU': 'MUS', 'YT': 'MYT', 'MX': 'MEX', 'FM': 'FSM', 'MD': 'MDA', 'MC': 'MCO', 'MN': 'MNG', 'ME': 'MNE', 'MS': 'MSR', 'MA': 'MAR', 'MZ': 'MOZ', 'MM': 'MMR', 'NA': 'NAM', 'NR': 'NRU', 'NP': 'NPL', 'NL': 'NLD', 'AN': 'ANT', 'NC': 'NCL', 'NZ': 'NZL', 'NI': 'NIC', 'NE': 'NER', 'NG': 'NGA', 'NU': 'NIU', 'NF': 'NFK', 'MP': 'MNP', 'NO': 'NOR', 'OM': 'OMN', 'PK': 'PAK', 'PW': 'PLW', 'PS': 'PSE', 'PA': 'PAN', 'PG': 'PNG', 'PY': 'PRY', 'PE': 'PER', 'PH': 'PHL', 'PN': 'PCN', 'PL': 'POL', 'PT': 'PRT', 'PR': 'PRI', 'QA': 'QAT', 'RE': 'REU', 'RO': 'ROU', 'RU': 'RUS', 'RW': 'RWA', 'BL': 'BLM', 'SH': 'SHN', 'KN': 'KNA', 'LC': 'LCA', 'MF': 'MAF', 'PM': 'SPM', 'VC': 'VCT', 'WS': 'WSM', 'SM': 'SMR', 'ST': 'STP', 'SA': 'SAU', 'SN': 'SEN', 'RS': 'SRB', 'SC': 'SYC', 'SL': 'SLE', 'SG': 'SGP', 'SK': 'SVK', 'SI': 'SVN', 'SB': 'SLB', 'SO': 'SOM', 'ZA': 'ZAF', 'GS': 'SGS', 'SS': 'SSD', 'ES': 'ESP', 'LK': 'LKA', 'SD': 'SDN', 'SR': 'SUR', 'SJ': 'SJM', 'SZ': 'SWZ', 'SE': 'SWE', 'CH': 'CHE', 'SY': 'SYR', 'TW': 'TWN', 'TJ': 'TJK', 'TZ': 'TZA', 'TH': 'THA', 'TL': 'TLS', 'TG': 'TGO', 'TK': 'TKL', 'TO': 'TON', 'TT': 'TTO', 'TN': 'TUN', 'TR': 'TUR', 'TM': 'TKM', 'TC': 'TCA', 'TV': 'TUV', 'UG': 'UGA', 'UA': 'UKR', 'AE': 'ARE', 'GB': 'GBR', 'US': 'USA', 'UM': 'UMI', 'UY': 'URY', 'UZ': 'UZB', 'VU': 'VUT', 'VE': 'VEN', 'VN': 'VNM', 'VI': 'VIR', 'WF': 'WLF', 'EH': 'ESH', 'YE': 'YEM', 'ZM': 'ZMB', 'ZW': 'ZWE', 'XK': 'XKX'}
# fmt: on


In [4]:
# import os
# f = os.listdir('electricitymaps_datasets')
# codes = list(set( [s.split('_')[0] for s in f]))
# points_steps = [5.6, 9.75, 1, 1.38, 1.25, 1, 1, 1, 1, 1, 1, 1, 1]
# codes = list(zip(codes, points_steps))
# codes

codes = [
    ("BR-CS", 5.6),
    ("CA-ON", 9.75),
    ("CH", 1),
    ("DE", 1.38),
    ("PL", 1.25),
    ("BE", 1),
    ("IT-NO", 1.2),
    ("CA-QC", 9.75),
    ("ES", 1.5),
    ("GB", 1.25),
    ("FI", 1.5),
    ("FR", 1.6),
    ("NL", 1),
]

In [8]:
import pandas as pd
import numpy as np


for code, points_step in codes:
    # code = 'BE'

    emission_df = pd.concat(
        (
            pd.read_csv(f"electricitymaps_datasets/{code}_2021_hourly.csv"),
            pd.read_csv(f"electricitymaps_datasets/{code}_2022_hourly.csv"),
        )
    ).reset_index(drop=True)

    emission_df["Datetime (UTC)"] = pd.to_datetime(emission_df["Datetime (UTC)"])
    emission_df = emission_df[
        ["Datetime (UTC)", "Zone Name", "Zone Id", "Carbon Intensity gCO₂eq/kWh (LCA)"]
    ]
    weather_country_code = emission_df["Zone Id"][0]

    country_code = emission_df["Zone Id"][0].split("-")[0]
    emission_df["country_code"] = country_code

    weather_country_code = emission_df["country_code"][0].split("-")[0]

    points = get_points_over_country(
        country_code=countryISOMapping[country_code], points_step=points_step
    )

    print(code, len(points))
    year_start = "2021"
    month_start = "01"
    day_start = "01"

    year_end = "2022"
    month_end = "12"
    day_end = "31"

    weather_matrices = []
    for longitude, latitude in points:
        weather_df = get_historic_data(
            latitude=latitude,
            longitude=longitude,
            year_start=year_start,
            month_start=month_start,
            day_start=day_start,
            year_end=year_end,
            month_end=month_end,
            day_end=day_end,
            parameters=parameters,
        )
        if "weathercode" in weather_df.columns:
            weather_df = weather_df.drop(columns=["weathercode"])

        weather_df["emission"] = emission_df["Carbon Intensity gCO₂eq/kWh (LCA)"].values
        weather_df["time"] = emission_df["Datetime (UTC)"].dt.hour
        
        # weather_df["country_code"] = emission_df["country_code"]
        weather_df['longitude'] = longitude
        weather_df['latitude'] = latitude
        # weather_df = weather_df.drop(columns=['time'])
        # print(weather_df.columns)
        
        weather_matrices.append(weather_df.to_numpy())

    weather_matrices = np.array(weather_matrices)
    weather_matrices = weather_matrices.transpose((1, 2, 0))
    np.save("electricitymaps_datasets/" + f"{code}_np_dataset.npy", weather_matrices)

BR-CS 30
CA-ON 30
CH 7
DE 30
PL 31
BE 6
IT-NO 34
CA-QC 30
ES 33
GB 33
FI 33
FR 38
NL 9


In [9]:
weather_matrices.shape


(17520, 24, 9)

In [17]:
code = "BE"

import numpy as np
import pandas as pd

features = np.load(f"electricitymaps_datasets/{code}_np_dataset.npy", allow_pickle=True)
# features = features[:, 1:, :]
# features = features[:, :-1, :]

emission_df = pd.concat(
    (
        pd.read_csv(f"electricitymaps_datasets/{code}_2021_hourly.csv"),
        pd.read_csv(f"electricitymaps_datasets/{code}_2022_hourly.csv"),
    )
).reset_index(drop=True)
target = emission_df["Carbon Intensity gCO₂eq/kWh (LCA)"].to_numpy()

In [14]:
from weather_co2_dataset import WeatherCO2DataModule

dm = WeatherCO2DataModule(
    [features],
    [target],
    lookback_window=96,
    predict_window=24,
    num_workers=10,
    batch_size=4,
)

2023-07-05 13:36:26.154871: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-07-05 13:36:26.215345: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
dm.setup('fit')
len(dm.train_dataset)

12145

In [19]:
codes = ["BR-CS", "CA-ON", "CH", "DE", "PL", "BE", "IT-NO", "CA-QC", "ES", "GB", "FI", "FR", "NL"]
for code in codes:
    features = np.load(f"electricitymaps_datasets/{code}_np_dataset.npy", allow_pickle=True)
    print(features.shape)

(17520, 23, 30)
(17520, 23, 30)
(17520, 23, 7)
(17520, 23, 30)
(17520, 23, 31)
(17520, 23, 6)
(17520, 23, 34)
(17520, 23, 30)
(17520, 23, 33)
(17520, 23, 33)
(17520, 23, 33)
(17520, 23, 38)
(17520, 23, 9)
