In [1]:
#default_exp data.datasets.business

# Business G dataset

> API details.

In [2]:
#export
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union

import numpy as np
import pandas as pd
import s3fs
from dotenv import load_dotenv

from nixtla.data.datasets.utils import download_file, Info, TimeSeriesDataclass
from nixtla.data.tsdataset import TimeSeriesDataset

In [3]:
#export
load_dotenv()

True

Tourism meta information

In [8]:
#export
@dataclass
class D1:
    seasonality: int = 7
    horizon: int = 7

@dataclass
class D2:
    seasonality: int = 7
    horizon: int = 7

In [9]:
#export
BusinessInfo = Info(groups=('D1', 'D2'),
                    class_groups=(D1, D2))

In [14]:
#export
class Business(TimeSeriesDataclass):

    @staticmethod
    def load(directory: str,
             group: str,
             return_tensor: bool = True): #-> Union[TimeSeriesDataset, TimeSeriesDataclass]:
        """
        Downloads and loads Tourism data.

        Parameters
        ----------
        directory: str
            Directory where data will be downloaded.
        group: str
            Group name.
            Allowed groups: 'D1', 'D2'.
        return_tensor: bool
            Wheter return TimeSeriesDataset (tensors, True) or
            TimeSeriesDataclass (dataframes)
            
        Notes
        -----
        [1] Returns train+test sets.
        """
        path = Path(directory) / 'business' / 'datasets'

        Business.download(directory)

        df = pd.read_csv(path / f'ts-{group.lower()}.csv')
        
        #if return_tensor:
        #    return TimeSeriesDataset(y_df=df, X_s_df=None, X_t_df=None, output_size=class_group.horizon)
        #else:
        #    return TimeSeriesDataclass(Y=df, S=None, X=None, group=group)
        return df, None

    @staticmethod
    def download(directory: str) -> None:
        """Downloads Business Dataset."""
        
        fs = s3fs.S3FileSystem(key=os.environ['AWS_ACCES_KEY_ID'], 
                               secret=os.environ['AWS_SECRET_ACCESS_KEY'])
        
        path = Path(directory) / 'business' / 'datasets'
        if not path.exists():
            file = 's3://research-storage-orax/business-data/ts-d{number}.csv'
            for i in [1, 2]:
                download_file = path / f'ts-d{i}.csv'
                fs.download(file.format(number=i), str(download_file))

In [16]:
for group in BusinessInfo.groups:
    print(group)
    dataset = Business.load(directory='data', group=group)
    print(dataset[0].head())

D1
           unique_id          ds     y
0  uid_12601_2_31090  2018-05-24  88.0
1  uid_12601_2_31090  2018-05-25  23.0
2  uid_12601_2_31090  2018-05-26  88.0
3  uid_12601_2_31090  2018-05-27   0.0
4  uid_12601_2_31090  2018-05-28   0.0
D2
         unique_id          ds     y
0  uid_3002_122693  2018-01-01   1.0
1  uid_3002_122693  2018-01-02  16.0
2  uid_3002_122693  2018-01-03  14.0
3  uid_3002_122693  2018-01-04  12.0
4  uid_3002_122693  2018-01-05   4.0
