In [None]:
# default_exp data.utils

In [None]:
#hide
%load_ext autoreload
%autoreload 2

## Data Utils
> Collection of utilities to work with Time Series data and tests.

In [None]:
#export
from typing import Tuple

import numpy as np
import pandas as pd

In [None]:
#export
def create_synthetic_tsdata(n_ts: int = 64) -> Tuple[pd.DataFrame,
                                                     pd.DataFrame,
                                                     pd.DataFrame]:
    """Creates synthetic time serie data."""
    uids = np.array([f'uid_{i}' for i in range(n_ts)])
    dss = pd.date_range(end='2020-12-31', periods=n_ts)
    
    df = []
    for idx in range(n_ts):
        ts = pd.DataFrame({'unique_id': np.repeat(uids[idx], idx + 1),
                           'ds': dss[-(idx + 1):],
                           'y': 1 + np.arange(idx + 1)})
        df.append(ts)
    
    df = pd.concat(df)
    df['day_of_week'] = df['ds'].dt.day_of_week
    df['future_1'] = df['y'] + 1
    df['id_ts'] = df['unique_id'].astype('category').cat.codes
    
    Y_df = df.filter(items=['unique_id', 'ds', 'y'])
    X_df = df.filter(items=['unique_id', 'ds', 'day_of_week', 'future_1'])
    S_df = df.filter(items=['unique_id', 'id_ts']).drop_duplicates()
    
    return Y_df, X_df, S_df

In [None]:
for df in create_synthetic_tsdata():
    print(df.head())

  unique_id         ds  y
0     uid_0 2020-12-31  1
0     uid_1 2020-12-30  1
1     uid_1 2020-12-31  2
0     uid_2 2020-12-29  1
1     uid_2 2020-12-30  2
  unique_id         ds  day_of_week  future_1
0     uid_0 2020-12-31            3         2
0     uid_1 2020-12-30            2         2
1     uid_1 2020-12-31            3         3
0     uid_2 2020-12-29            1         2
1     uid_2 2020-12-30            2         3
  unique_id  id_ts
0     uid_0      0
0     uid_1      1
0     uid_2     12
0     uid_3     23
0     uid_4     34
