# Timeseries Utilities

In [1]:
import calendar

import pandas as pd
import numpy as np

## Decompose Date

In [None]:
def decompose_dates(
    df: pd.DataFrame,
    feature_list: list[str] = _FEATURES_LIST
) -> tuple[pd.DataFrame, list[str]]:
    """ Decompose a datetime index into datetime components.

    Parameters
    ----------
    df : dataframe
        Dataframe with a index of type datetime.
    feature_list : list[str]
        List with a select datetime features.

    Returns
    -------
    output : dataframe
        Original dataframe plus the a set a features from
    feature_list : list[str]
        List with features to be exported
    """

    output = df.copy()
    output['year'] = output.index.year
    output['month'] = output.index.month  # 1-12

    output['week'] = output.index.isocalendar().week.astype('int64')
    output['week'] = np.where((output['month'] == 1) & (output['week'] >= 52), 0, output['week'])

    output['day'] = output.index.dayofyear  # 1-365
    output['dayofmonth'] = output.index.day  # 1-31
    output['dayofweek'] = output.index.weekday  # 0-6

    output['hour'] = output.index.hour  # 0-23

    return output[feature_list], feature_list

## Fix day of the year in leap years

In [None]:
def fix_day_of_year(df: pd.DataFrame) -> pd.DataFrame:
    """ Fix day of the year due to leap years having one more day (29th Feb)

    Parameters
    ----------
    df : dataframe
        Dataframe with columns for year, month, day

    Returns
    -------
    output : dataframe
    """

    output = df.copy()

    if set(['year', 'month', 'day']).issubset(set(output.columns)):
        # check if year is leap
        output['is_leap'] = output.apply(lambda x: calendar.isleap(x['year']), axis=1)

        def _fix_day(x):
            if not x['is_leap']:
                # if not leap and is past march, add 1 to year day
                return x['day']+1 if x['month'] >= 3 else x['day']
            else:
                return x['day']

        output['day'] = output.apply(_fix_day, axis=1)

        return output.drop('is_leap', axis=1)
    else:
        raise Exception('Dataframe missing one of the following columns: year, month, day')