In [185]:
import pandas as pd
import numpy as np
import datetime
import holidays

In [186]:
# function for calendar
def cal(start, end):
    cal = pd.DataFrame(pd.date_range(start, end, freq='D'), columns=['date'])

    cal['year'] = cal['date'].dt.year
    cal['month'] = cal['date'].dt.month
    cal['month_name'] = cal['date'].dt.month_name()
    cal['day'] = cal['date'].dt.day
    cal['daysinmonth'] = cal['date'].dt.daysinmonth

    cal['year'] = cal['date'].dt.year
    cal['quarter'] = cal['date'].dt.quarter

    cal['weekofyear'] = cal['date'].dt.isocalendar().week

    cal['weekday'] = cal['date'].dt.weekday # Mon: 0
    cal['day_name'] = cal['date'].dt.day_name()

    cal['dayofyear'] = cal['date'].dt.dayofyear

    cal['is_month_start'] = cal['date'].dt.is_month_start
    cal['is_month_end'] = cal['date'].dt.is_month_end
    cal['is_quarter_start'] = cal['date'].dt.is_quarter_start
    cal['is_quarter_end'] = cal['date'].dt.is_quarter_end

    cal['is_quarter_start'] = cal['date'].dt.is_quarter_start
    cal['is_quarter_end'] = cal['date'].dt.is_quarter_end

    cal['is_leap_year'] = cal['date'].dt.is_leap_year

    # cal['freq'] = cal['date'].dt.freq

    cal.replace([True, False], [1, 0], inplace=True)
    
    return cal

In [187]:
# set up the year of 2021
calendar2021 = cal('2021-01-01', '2021-12-31')

calendar2021.head(3)

Unnamed: 0,date,year,month,month_name,day,daysinmonth,quarter,weekofyear,weekday,day_name,dayofyear,is_month_start,is_month_end,is_quarter_start,is_quarter_end,is_leap_year
0,2021-01-01,2021,1,January,1,31,1,53,4,Friday,1,1,0,1,0,0
1,2021-01-02,2021,1,January,2,31,1,53,5,Saturday,2,0,0,0,0,0
2,2021-01-03,2021,1,January,3,31,1,53,6,Sunday,3,0,0,0,0,0


In [188]:
calendar2021.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   date              365 non-null    datetime64[ns]
 1   year              365 non-null    int64         
 2   month             365 non-null    int64         
 3   month_name        365 non-null    object        
 4   day               365 non-null    int64         
 5   daysinmonth       365 non-null    int64         
 6   quarter           365 non-null    int64         
 7   weekofyear        365 non-null    int64         
 8   weekday           365 non-null    int64         
 9   day_name          365 non-null    object        
 10  dayofyear         365 non-null    int64         
 11  is_month_start    365 non-null    int64         
 12  is_month_end      365 non-null    int64         
 13  is_quarter_start  365 non-null    int64         
 14  is_quarter_end    365 non-

In [189]:
# construct a holiday dataset of Ontario, 2021
holidayON2021 = pd.DataFrame(holidays.CA(years=2021, prov='ON').items(), columns=['date', 'holidayName'])
holidayON2021['date'] = holidayON2021['date'].astype('datetime64')
holidayON2021

Unnamed: 0,date,holidayName
0,2021-01-01,New Year's Day
1,2021-12-31,New Year's Day (Observed)
2,2021-02-15,Family Day
3,2021-04-02,Good Friday
4,2021-05-24,Victoria Day
5,2021-07-01,Canada Day
6,2021-08-02,Civic Holiday
7,2021-09-06,Labour Day
8,2021-10-11,Thanksgiving
9,2021-12-25,Christmas Day


In [190]:
holidayON2021.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date         12 non-null     datetime64[ns]
 1   holidayName  12 non-null     object        
dtypes: datetime64[ns](1), object(1)
memory usage: 320.0+ bytes


In [191]:
# merge calendar and holiday
calendarON2021 = pd.merge(calendar2021, holidayON2021, how='left', on='date')
calendarON2021.head(3)

Unnamed: 0,date,year,month,month_name,day,daysinmonth,quarter,weekofyear,weekday,day_name,dayofyear,is_month_start,is_month_end,is_quarter_start,is_quarter_end,is_leap_year,holidayName
0,2021-01-01,2021,1,January,1,31,1,53,4,Friday,1,1,0,1,0,0,New Year's Day
1,2021-01-02,2021,1,January,2,31,1,53,5,Saturday,2,0,0,0,0,0,
2,2021-01-03,2021,1,January,3,31,1,53,6,Sunday,3,0,0,0,0,0,


In [193]:
# set up flags for holidays and weekends
calendarON2021['holidayFlag1'] = calendarON2021['holidayName'].apply(lambda x: 0 if x is np.NaN else 1)
calendarON2021['weekdayFlag'] = calendarON2021['weekday'].apply(lambda x: 1 if x in [5, 6] else 0)

calendarON2021

Unnamed: 0,date,year,month,month_name,day,daysinmonth,quarter,weekofyear,weekday,day_name,dayofyear,is_month_start,is_month_end,is_quarter_start,is_quarter_end,is_leap_year,holidayName,holidayFlag1,weekdayFlag
0,2021-01-01,2021,1,January,1,31,1,53,4,Friday,1,1,0,1,0,0,New Year's Day,1,0
1,2021-01-02,2021,1,January,2,31,1,53,5,Saturday,2,0,0,0,0,0,,0,1
2,2021-01-03,2021,1,January,3,31,1,53,6,Sunday,3,0,0,0,0,0,,0,1
3,2021-01-04,2021,1,January,4,31,1,1,0,Monday,4,0,0,0,0,0,,0,0
4,2021-01-05,2021,1,January,5,31,1,1,1,Tuesday,5,0,0,0,0,0,,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,2021-12-27,2021,12,December,27,31,4,52,0,Monday,361,0,0,0,0,0,Boxing Day (Observed),1,0
361,2021-12-28,2021,12,December,28,31,4,52,1,Tuesday,362,0,0,0,0,0,,0,0
362,2021-12-29,2021,12,December,29,31,4,52,2,Wednesday,363,0,0,0,0,0,,0,0
363,2021-12-30,2021,12,December,30,31,4,52,3,Thursday,364,0,0,0,0,0,,0,0
