In [29]:
import pandas as pd
from datetime import datetime, timedelta, date
from workalendar.europe import Netherlands

## Vacations

In [30]:
# define vacations
kerst_19 = pd.DataFrame(data = {'date': pd.date_range(date(2019, 12, 21), periods = 7*2 + 2, freq='1d')})
voorjaar_20 = pd.DataFrame(data = {'date': pd.date_range(date(2020, 2, 15), periods = 9, freq='1d')})
mei_20 = pd.DataFrame(data = {'date': pd.date_range(date(2020, 4, 25), periods = 9, freq='1d')})
zomer_20 = pd.DataFrame(data = {'date': pd.date_range(date(2020, 7, 4), periods = 7*6 + 2, freq='1d')})
herfst_20 = pd.DataFrame(data = {'date': pd.date_range(date(2020, 10, 10), periods = 9, freq='1d')})
kerst_20 = pd.DataFrame(data = {'date': pd.date_range(date(2020, 12, 19), periods = 7*2 + 2, freq='1d')})
voorjaar_21 = pd.DataFrame(data = {'date': pd.date_range(date(2021, 2, 20), periods = 9, freq='1d')})
mei_21 = pd.DataFrame(data = {'date': pd.date_range(date(2021, 5, 1), periods = 9, freq='1d')})
zomer_21 = pd.DataFrame(data = {'date': pd.date_range(date(2021, 7, 10), periods = 7*6 + 2, freq='1d')})
herfst_21 = pd.DataFrame(data = {'date': pd.date_range(date(2021, 10, 16), periods = 9, freq='1d')})
kerst_21 = pd.DataFrame(data = {'date': pd.date_range(date(2021, 12, 25), periods = 7*2 + 2, freq='1d')})

In [31]:
vacation_df_raw = kerst_19.append([voorjaar_20, mei_20, zomer_20, herfst_20, kerst_20,
                                   voorjaar_21, mei_21, zomer_21, herfst_21, kerst_21])

In [32]:
def preprocess_vacation_data(df_raw):
    """
    Prepare the raw vacation data for modelling. 
    """
    # Create datetime index
    df_raw['datetime'] = pd.to_datetime(df_raw['date']).dt.tz_localize("Europe/Amsterdam")
    df_raw = df_raw.set_index('datetime')
    
    # Create dummy variable
    df_raw['vacation_dummy'] =  1
    df = df_raw.resample('1d').asfreq()
    df['vacation_dummy'] = df['vacation_dummy'].fillna(0)
    df['vacation_dummy'] = df['vacation_dummy'].astype(int)
    
    # Select column
    df = df[['vacation_dummy']]
    
    return df

In [33]:
vacation_df = preprocess_vacation_data(vacation_df_raw)

In [40]:
vacation_df

Unnamed: 0_level_0,vacation_dummy
datetime,Unnamed: 1_level_1
2019-12-21 00:00:00+01:00,1
2019-12-22 00:00:00+01:00,1
2019-12-23 00:00:00+01:00,1
2019-12-24 00:00:00+01:00,1
2019-12-25 00:00:00+01:00,1
...,...
2022-01-05 00:00:00+01:00,1
2022-01-06 00:00:00+01:00,1
2022-01-07 00:00:00+01:00,1
2022-01-08 00:00:00+01:00,1


## Holidays

In [41]:
holidays_data_raw = Netherlands().holidays(2020) + Netherlands().holidays(2021) + Netherlands().holidays(2022) 

In [42]:
def preprocess_holidays_data(holidays):
    """
    Prepare the raw holiday data for modelling. 
    """
    # Put in dataframe
    holiday_df = pd.DataFrame(holidays).rename(columns = {0: 'date', 1: 'holiday'})
    
    # Create datetime index
    holiday_df['datetime'] = pd.to_datetime(holiday_df['date']).dt.tz_localize("Europe/Amsterdam")
    holiday_df = holiday_df.set_index('datetime')
    
    # Create dummy variable
    holiday_df['holiday_dummy'] =  1
    holiday_df_d = holiday_df.resample('1d').asfreq()
    holiday_df_d['holiday_dummy'] = holiday_df_d['holiday_dummy'].fillna(0)
    holiday_df_d['holiday_dummy'] = holiday_df_d['holiday_dummy'].astype(int)
    
    # Select column
    holiday_df_d = holiday_df_d[['holiday_dummy']]
    
    return holiday_df_d

In [43]:
holiday_df = preprocess_holidays_data(holidays_data_raw)

In [44]:
holiday_df

Unnamed: 0_level_0,holiday_dummy
datetime,Unnamed: 1_level_1
2020-01-01 00:00:00+01:00,1
2020-01-02 00:00:00+01:00,0
2020-01-03 00:00:00+01:00,0
2020-01-04 00:00:00+01:00,0
2020-01-05 00:00:00+01:00,0
...,...
2022-12-22 00:00:00+01:00,0
2022-12-23 00:00:00+01:00,0
2022-12-24 00:00:00+01:00,0
2022-12-25 00:00:00+01:00,1


In [38]:
vacation_df.to_csv('../data/vacation.csv')

In [45]:
holiday_df.to_csv('../data/holiday.csv')