## Data creation-Days

This notebook builds a function that adds the day of the week/holidays for each calendar day.

Including type of day as a predictor in energy price forecast.
The days of the week (exogenous varaible) that are generated by this function are:

- day of the week
- weekend or weekday
- holiday or special event

In [1]:
from datetime import date
import holidays
import pandas as pd

In [2]:
#create a datetime range
dates = pd.date_range(start='1/1/2019', end='31/12/2019')
dates

DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04',
               '2019-01-05', '2019-01-06', '2019-01-07', '2019-01-08',
               '2019-01-09', '2019-01-10',
               ...
               '2019-12-22', '2019-12-23', '2019-12-24', '2019-12-25',
               '2019-12-26', '2019-12-27', '2019-12-28', '2019-12-29',
               '2019-12-30', '2019-12-31'],
              dtype='datetime64[ns]', length=365, freq='D')

In [3]:
#create an object with all the holidays in denmakr
denmark_holidays = holidays.CountryHoliday('DK')

In [4]:
denmark_holidays.values()

dict_values([])

In [5]:
denmark_holidays.get('2018-12-25')

'Juledag'

In [6]:
def get_holidays(start='1/1/2019', stop='31/12/2019', country='DK', frequency='D'):
    """
    Takes in a start and stop date and a country.
    
    Produces a dataframe with a daily date time index and columns:
    day_of_week - numerical day of the week identifier 0 for monday
    holiday_bool - boolean true or false for holiday
    holiday_name - name of the holiday if holiday_bool is true
    
    Returns a dataframe
    """
    
    #generate the range of daily dates
    dates = pd.date_range(start=start, end=stop, freq=frequency)
    
    #create the holiday object
    country_holidays = holidays.CountryHoliday(country)

    #create a list for the holiday bool and name
    holiday_list = []
    
    #loop through the dates
    for date in dates:
        #true if holiday in object, false otherwise
        holiday_bool = date in country_holidays
        holiday_names = country_holidays.get(date)
        
        holiday_list.append([holiday_bool, holiday_names])
        
    #create return dataframe
    holidays_data = pd.DataFrame(holiday_list, index=dates, columns=['holiday_bool', 'holiday_name'])
                  
    return holidays_data

In [7]:
holiday_df = get_holidays(start='2015-01-01', stop='2020-12-31')

In [8]:
holiday_df.holiday_name.unique()

array(['Nytårsdag', None, 'Palmesøndag', 'Skærtorsdag', 'Langfredag',
       'Påskedag', 'Anden påskedag', 'Store bededag',
       'Kristi himmelfartsdag', 'Pinsedag', 'Anden pinsedag', 'Juledag',
       'Anden juledag'], dtype=object)

In [9]:
holiday_df.head()

Unnamed: 0,holiday_bool,holiday_name
2015-01-01,True,Nytårsdag
2015-01-02,False,
2015-01-03,False,
2015-01-04,False,
2015-01-05,False,


In [10]:
def get_days_dummies(start='1/1/2019', stop='31/12/2019', frequency='D'):
    """
    Takes in a start and stop date and frequency.
    
    Produces a dataframe with a date time index at the frequency input and columns:
    weekday_id - numerical day of the week identifier 0 for monday
    
    Returns a dataframe
    """
    
    #generate the range of daily dates
    dates = pd.date_range(start=start, end=stop, freq=frequency)
    
    #create a dataframe of weekday categories
    days = pd.DataFrame(list(dates.weekday), index=dates, columns=['weekday_id'])
    
    days = pd.get_dummies(days['weekday_id'])
    
    columns = ['mon', 'tue', 'wed', 'thur', 'fri', 'sat', 'sun']
    
    days.columns = columns
    
    return days
    

In [11]:
get_days_dummies()

Unnamed: 0,mon,tue,wed,thur,fri,sat,sun
2019-01-01,0,1,0,0,0,0,0
2019-01-02,0,0,1,0,0,0,0
2019-01-03,0,0,0,1,0,0,0
2019-01-04,0,0,0,0,1,0,0
2019-01-05,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...
2019-12-27,0,0,0,0,1,0,0
2019-12-28,0,0,0,0,0,1,0
2019-12-29,0,0,0,0,0,0,1
2019-12-30,1,0,0,0,0,0,0


In [None]:
def get_holidays(start='1/1/2015', stop='31/12/2020', country='DK'):
    """
    Takes in a start and stop date and a country.
    
    Produces a dataframe with a daily date time index and columns:
    day_of_week - numerical day of the week identifier 0 for monday
    holiday_bool - boolean true or false for holiday
    holiday_name - name of the holiday if holiday_bool is true
    
    Returns a dataframe
    """
    
    #generate the range of daily dates
    dates = pd.date_range(start=start, end=stop)
    
    #create the holiday object
    country_holidays = holidays.CountryHoliday(country)
    
    #create a dataframe of weekday categories
    days = pd.DataFrame(list(dates.weekday), index=dates, columns=['weekday_id'])

    #create a list for the holiday bool and name
    holiday_list = []
    
    #loop through the dates
    for date in dates:
        #true if holiday in object, false otherwise
        holiday_bool = date in country_holidays
        holiday_names = country_holidays.get(date)
        
        holiday_list.append([holiday_bool, holiday_names])
        
    #create return dataframe
    holidays_data = pd.DataFrame(holiday_list, index=dates, columns=['holiday_bool', 'holiday_name'])
          
    #join the days and the holidays_data dataframes
    data = pd.concat([days, holidays_data], axis=1)
                 
                    
    return data