In [None]:
#default_exp data.datasets.m3

# M3 dataset

> API details.

In [None]:
#export
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union

import numpy as np
import pandas as pd

from nixtla.data.datasets.utils import download_file, Info, TimeSeriesDataclass

In [None]:
#export
SOURCE_URL = 'https://forecasters.org/data/m3comp/M3C.xls'

Tourism meta information

In [None]:
#export
@dataclass
class Yearly:
    seasonality: int = 1
    horizon: int = 6
    freq: str = 'D'
    sheet_name: str = 'M3Year'
    name: str = 'Yearly'

@dataclass
class Quarterly:
    seasonality: int = 4
    horizon: int = 8
    freq: str = 'Q'
    sheet_name: str = 'M3Quart'
    name: str = 'Quarterly'

@dataclass
class Monthly:
    seasonality: int = 12
    horizon: int = 18
    freq: str = 'M'
    sheet_name: str = 'M3Month'
    name: str = 'Monthly'

@dataclass
class Other:
    seasonality: int = 1
    horizon: int = 8
    freq: str = 'D'
    sheet_name: str = 'M3Other'
    name: str = 'Other'

@dataclass
class M3Info:
    groups: Tuple = ('Yearly', 'Quarterly', 'Monthly', 'Other')
    class_groups: Tuple = (Yearly, Quarterly, Monthly, Other)

    def get_group(self, group: str):
        """Gets dataclass of group."""
        if group not in self.groups:
            raise Exception(f'Unkown group {group}')

        return self.class_groups[self.groups.index(group)]

In [None]:
#export
M3Info = Info(groups=('Yearly', 'Quarterly', 'Monthly', 'Other'),
              class_groups=(Yearly, Quarterly, Monthly, Other))

In [None]:
#export
@dataclass
class M3(TimeSeriesDataclass):

    @staticmethod
    def load(directory: str,
             group: str,
             training: bool = True) -> 'M3':
        """
        Downloads and loads M3 data.

        Parameters
        ----------
        directory: str
            Directory where data will be downloaded.
        group: str
            Group name.
            Allowed groups: 'Yearly', 'Quarterly', 'Monthly', 'Other'.
        training: bool
            Wheter return training or testing data. Default True.
        """
        path = Path(directory) / 'm3' / 'datasets'

        M3.download(directory)

        class_group = M3Info.get_group(group)

        df = pd.read_excel(path / 'M3C.xls', sheet_name=class_group.sheet_name)
        df = df.rename(columns={'Series': 'unique_id'})
        df['unique_id'] = [class_group.name[0] + str(i + 1) for i in range(len(df))]

        id_vars = list(df.columns[:6])

        df = pd.melt(df, id_vars=id_vars, var_name='ds', value_name='y')
        df = df.dropna().sort_values(['unique_id', 'ds']).reset_index(drop=True)

        df = df.filter(items=['unique_id', 'ds', 'y'])
        df = df.sort_values(by=['unique_id', 'ds']).reset_index(drop=True)

        if training:
            df = df.groupby('unique_id').apply(lambda df: df.head(-class_group.horizon)).reset_index(drop=True)
        else:
            df = df.groupby('unique_id').tail(class_group.horizon)
            df['ds'] = df.groupby('unique_id').cumcount() + 1

        return M3(Y=df, S=None, X=None)

    @staticmethod
    def download(directory: Path) -> None:
        """Download M3 Dataset."""
        path = Path(directory) / 'm3' / 'datasets'
        if not path.exists():
            download_file(path, SOURCE_URL)

In [None]:
M3.load(directory='data', group='Yearly').Y.head()

Unnamed: 0,unique_id,ds,y
0,Y1,1,940.66
1,Y1,2,1084.86
2,Y1,3,1244.98
3,Y1,4,1445.02
4,Y1,5,1683.17


In [None]:
for group in M3Info.groups:
    print(group)
    m3_dataset = M3.load(directory='data', group=group)
    print(m3_dataset.Y.head())

Yearly
  unique_id ds        y
0        Y1  1   940.66
1        Y1  2  1084.86
2        Y1  3  1244.98
3        Y1  4  1445.02
4        Y1  5  1683.17
Quarterly
  unique_id ds        y
0        Q1  1  3142.63
1        Q1  2  3190.75
2        Q1  3  3178.69
3        Q1  4  3170.94
4        Q1  5  3124.38
Monthly
  unique_id ds       y
0        M1  1  2640.0
1        M1  2  2640.0
2        M1  3  2160.0
3        M1  4  4200.0
4        M1  5  3360.0
Other
  unique_id ds        y
0        O1  1  3060.42
1        O1  2  3021.19
2        O1  3  3301.13
3        O1  4  3287.03
4        O1  5  3080.71
