In [None]:
#default_exp data.datasets.ecl

# Electricity Consumng Load (ECL) dataset

> Download the WTH dataset: https://archive.ics.uci.edu/ml/datasets/ElectricityLoadDiagrams20112014.

In [None]:
#hide
from nbdev import *
%load_ext autoreload
%autoreload 2

In [None]:
#export
import os
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Union

import gdown
import numpy as np
import pandas as pd

from nixtlats.data.datasets.utils import Info, time_features_from_frequency_str

## ECL meta information

In [None]:
#export
@dataclass
class ECL:
    freq: str = 'H'
    name: str = 'ECL'
    n_ts: int = 321

In [None]:
#export
ECLInfo = Info(groups=('ECL',),
              class_groups=(ECL,))

## Download data class

In [None]:
#export
@dataclass
class ECL:
    
    source_url: str = 'https://drive.google.com/uc?id=1rUPdR7R2iWFW-LMoDdHoO2g4KgnkpFzP'
 
    @staticmethod
    def load(directory: str,
             cache: bool = True) -> Tuple[pd.DataFrame, 
                                          Optional[pd.DataFrame], 
                                          Optional[pd.DataFrame]]:
        """Downloads and loads ETT data.

        Parameters
        ----------
        directory: str
            Directory where data will be downloaded.
        cache: bool
            If `True` saves and loads 
            
        Notes
        -----
        [1] Returns train+val+test sets.
        """
        path = f'{directory}/ecl/datasets'
        file_cache = f'{path}/ECL.p'
        
        if os.path.exists(file_cache) and cache:
            df, X_df, S_df = pd.read_pickle(file_cache)
            
            return df, X_df, S_df
        
        
        ECL.download(directory)
        path = f'{directory}/ecl/datasets'
        
        y_df = pd.read_csv(f'{path}/ECL.csv')
        y_df['date'] = pd.to_datetime(y_df['date'])
        y_df.rename(columns={'date': 'ds'}, inplace=True)
        u_ids = y_df.columns.to_list()
        u_ids.remove('ds')
        
        time_cls = time_features_from_frequency_str('h')
        for cls_ in time_cls:
            cls_name = cls_.__class__.__name__
            y_df[cls_name] = cls_(y_df['ds'].dt)

        X_df = y_df.drop(u_ids, axis=1)
        y_df = y_df.filter(items=['ds'] + u_ids)
        y_df = y_df.set_index('ds').stack()
        y_df = y_df.rename('y').rename_axis(['ds', 'unique_id']).reset_index()
        y_df['unique_id'] = pd.Categorical(y_df['unique_id'], u_ids)
        y_df = y_df[['unique_id', 'ds', 'y']].sort_values(['unique_id', 'ds'])
        
        X_df = y_df[['unique_id', 'ds']].merge(X_df, how='left', on=['ds'])
       
        S_df = None
        if cache:
            pd.to_pickle((y_df, X_df, S_df), file_cache)
            
        return y_df, X_df, S_df

    @staticmethod
    def download(directory: str) -> None:
        """Download ECL Dataset."""
        path = f'{directory}/ecl/datasets/'
        if not os.path.exists(path):
            os.makedirs(path)
            gdown.download(ECL.source_url, f'{path}/ECL.csv')

In [None]:
for group, meta in ECLInfo:
    y_df, x_df, s_df = ECL.load(directory='data', cache=False)
    n_series = len(np.unique(y_df.unique_id.values))
    ex_vars = x_df.columns.to_list()
    ex_vars.remove('unique_id')
    ex_vars.remove('ds')

    display_str  = f'Group: {group} '
    display_str += f'n_series: {n_series} '
    display_str += f'ex_vars: {", ".join(ex_vars)}'

    print(display_str)

Group: ECL n_series: 321 ex_vars: HourOfDay, DayOfWeek, DayOfMonth, DayOfYear


In [None]:
def test_wth(expected_first_ds_y: np.ndarray,
             expected_first_ds_x: np.ndarray) -> None:
    y_df, x_df, _ = ECL.load(directory='data', cache=False)
    first_ds_y = y_df.groupby('unique_id').head(1)['y'].values
    first_ds_x = x_df.groupby('unique_id').head(1).drop(['unique_id', 'ds'], axis=1).values
    expected_x = np.repeat(expected_first_ds_x.reshape(1, -1), first_ds_x.shape[0], axis=0)
    
    np.testing.assert_array_almost_equal(first_ds_y, expected_first_ds_y)
    np.testing.assert_array_almost_equal(first_ds_x, expected_x)

In [None]:
test_wth(expected_first_ds_y=np.array([1.4000e+01, 6.9000e+01, 2.3400e+02, 4.1500e+02, 2.1500e+02,
                                       1.0560e+03, 2.9000e+01, 8.4000e+02, 2.2600e+02, 2.6500e+02,
                                       1.7900e+02, 1.4800e+02, 1.1200e+02, 1.7100e+02, 2.2900e+02,
                                       1.0010e+03, 4.9000e+01, 1.6200e+02, 5.9400e+02, 8.8000e+01,
                                       3.4000e+01, 8.8500e+02, 1.2200e+02, 1.0200e+02, 4.2500e+02,
                                       1.8500e+02, 3.6000e+02, 8.3000e+01, 4.4900e+02, 3.7000e+01,
                                       4.0800e+02, 3.4600e+02, 5.2100e+02, 1.7700e+02, 4.0470e+03,
                                       1.5100e+02, 2.1800e+02, 8.1100e+02, 1.1200e+02, 6.8000e+01,
                                       4.1310e+03, 7.8400e+02, 1.8400e+02, 1.6700e+02, 3.2500e+02,
                                       2.7800e+02, 2.2400e+02, 1.5800e+02, 1.2700e+02, 5.1200e+02,
                                       5.0200e+02, 1.2600e+02, 4.0700e+02, 2.9300e+02, 1.3700e+02,
                                       3.9760e+03, 2.8000e+02, 2.6700e+02, 6.8100e+02, 6.7300e+02,
                                       8.3700e+02, 2.1500e+02, 1.9200e+02, 3.9700e+02, 1.1900e+02,
                                       4.5300e+02, 2.8900e+02, 2.9000e+02, 1.2400e+02, 2.4300e+02,
                                       1.3200e+02, 7.8000e+01, 4.4000e+02, 1.5800e+02, 3.8000e+02,
                                       7.9500e+02, 5.9800e+02, 1.8340e+03, 4.3600e+02, 4.0500e+02,
                                       1.0890e+03, 5.2000e+01, 3.5800e+02, 9.0000e+00, 3.0000e+01,
                                       1.3000e+01, 2.8000e+01, 2.9500e+02, 6.5800e+02, 8.8000e+02,
                                       3.8000e+01, 7.5000e+02, 1.7680e+03, 7.4700e+02, 6.0200e+02,
                                       1.6700e+02, 5.2800e+02, 3.3300e+02, 5.2200e+02, 1.9100e+02,
                                       4.3900e+02, 2.7000e+01, 6.9000e+01, 4.5500e+02, 4.0300e+02,
                                       3.8500e+02, 1.4200e+03, 1.5800e+02, 6.3000e+01, 4.2800e+02,
                                       1.1900e+02, 1.7400e+02, 1.4540e+03, 6.1200e+02, 2.0000e+01,
                                       5.1000e+01, 5.0400e+02, 2.5200e+02, 0.0000e+00, 7.9520e+03,
                                       1.0130e+03, 7.8000e+01, 1.0600e+02, 2.2100e+02, 4.1700e+02,
                                       1.0300e+02, 4.2400e+02, 2.6300e+02, 2.7880e+03, 7.6000e+01,
                                       1.0500e+02, 7.3670e+03, 1.4570e+03, 7.0970e+03, 8.1600e+02,
                                       2.4650e+03, 2.5800e+02, 9.2000e+01, 6.9400e+02, 3.3300e+02,
                                       3.9700e+02, 3.5800e+02, 1.8100e+02, 3.0400e+02, 1.2100e+02,
                                       1.8100e+02, 9.0000e+00, 8.0000e+01, 8.4700e+02, 9.2000e+02,
                                       3.4040e+03, 1.1430e+03, 6.9200e+02, 7.0060e+03, 9.0200e+02,
                                       3.5376e+04, 3.2200e+02, 4.4300e+02, 1.0710e+03, 1.4550e+03,
                                       5.8200e+02, 7.1300e+02, 9.7300e+02, 1.9340e+03, 1.0570e+03,
                                       3.2100e+02, 1.4230e+03, 1.4800e+04, 1.1110e+03, 1.8610e+03,
                                       3.5100e+02, 6.5300e+02, 2.9920e+03, 7.7100e+02, 6.9200e+02,
                                       4.4230e+03, 3.8900e+02, 2.6240e+03, 7.8100e+02, 5.0400e+03,
                                       2.8600e+02, 1.6900e+03, 1.9180e+03, 6.4400e+02, 1.2210e+03,
                                       5.8900e+02, 1.1003e+04, 2.2360e+03, 2.2200e+02, 1.2760e+03,
                                       1.6300e+02, 3.2500e+02, 1.7800e+02, 1.6340e+03, 3.5800e+02,
                                       1.8500e+02, 1.0900e+03, 2.4600e+02, 6.8900e+02, 7.1620e+03,
                                       4.3000e+02, 2.6700e+02, 3.0100e+02, 4.5500e+02, 1.4740e+03,
                                       7.8200e+02, 3.0000e+02, 1.8700e+02, 1.7700e+02, 2.0000e+02,
                                       4.6100e+02, 5.7000e+02, 5.3400e+02, 2.9500e+02, 3.6700e+02,
                                       2.4500e+02, 4.5000e+02, 5.3500e+02, 1.3300e+02, 5.9200e+02,
                                       3.3500e+02, 3.6200e+02, 5.0700e+02, 5.4600e+02, 1.3800e+02,
                                       4.6800e+02, 1.9060e+03, 1.2700e+02, 3.5800e+02, 3.3200e+02,
                                       1.0900e+03, 7.2000e+02, 2.9700e+02, 3.8590e+03, 2.2000e+02,
                                       1.7000e+02, 2.9272e+04, 5.2500e+02, 2.9300e+02, 2.5300e+02,
                                       7.7200e+02, 1.4100e+02, 1.9900e+02, 2.1200e+02, 3.5100e+02,
                                       1.3900e+02, 1.1560e+03, 2.2800e+02, 2.1800e+02, 1.9700e+02,
                                       1.4390e+03, 6.5500e+02, 2.0700e+02, 3.1500e+02, 3.0210e+03,
                                       1.3360e+03, 3.4400e+02, 2.7400e+02, 3.0000e+02, 3.3900e+02,
                                       1.3600e+02, 1.4900e+02, 1.3590e+03, 4.7900e+02, 1.4790e+03,
                                       2.8900e+02, 7.1900e+02, 4.4800e+02, 2.9070e+03, 2.2200e+02,
                                       6.5300e+02, 9.8100e+02, 3.6300e+02, 2.0300e+02, 2.0900e+02,
                                       2.1300e+02, 3.1990e+03, 7.2000e+02, 5.5200e+02, 6.4000e+02,
                                       1.0620e+03, 2.7870e+03, 5.1100e+02, 1.5900e+02, 6.7500e+02,
                                       1.1600e+02, 7.6000e+01, 1.1000e+02, 4.5100e+02, 2.9200e+02,
                                       4.7700e+03, 1.2300e+02, 1.6230e+03, 2.3400e+02, 1.3900e+02,
                                       2.9600e+02, 2.4120e+03, 3.1700e+02, 1.6000e+01, 1.6400e+02,
                                       2.4100e+02, 5.7500e+02, 1.8240e+03, 8.9000e+01, 8.2000e+02,
                                       6.6000e+01, 8.4700e+02, 8.1000e+01, 2.0020e+03, 9.3700e+02,
                                       1.9900e+02, 6.7600e+02, 3.7200e+02, 8.0100e+04, 4.7190e+03,
                                       5.0020e+03, 4.8000e+01, 3.8000e+01, 1.5580e+03, 1.8200e+02,
                                       2.1620e+03]),
         expected_first_ds_x=np.array([-0.5,  0.5, -0.5, -0.5]))