In [39]:
import pandas as pd
import datetime as dt
from pandas.tseries.holiday import *
from pandas.tseries.offsets import DateOffset
from dateutil.relativedelta import *

class HolidayCalendar(AbstractHolidayCalendar):
    rules = [
        Holiday('NewYearsDay', month=1, day=1, observance=nearest_workday),
        USMartinLutherKingJr,
        Holiday('SuperBowl', month=2, day=1, offset=DateOffset(weekday=SU(1))),
        USPresidentsDay,
        Holiday('StPatricksDay', month=3, day=17),
        GoodFriday,
        Holiday('Easter', month=1, day=1, offset=Easter()),
        USMemorialDay,
        Holiday('USIndependenceDay', month=7, day=4, observance=nearest_workday),
        USLaborDay,
        Holiday('Halloween', month=10, day=31),
        USThanksgivingDay,
        Holiday('Christmas', month=12, day=25, observance=nearest_workday),
        Holiday('NewYearsEve', month=12, day=31, observance=nearest_workday)
    ]
    
    
    
    def get_holidays(start, end):
        """
        Returns an index of holidays from HolidayCalendar
        
        args: 
        
        start = str in YYYY/MM/DD format if month or day is not specified
        then defaults to 1
        
        end = str in YYYY/MM/DD format if month or day is not specified
        then defaults to 1
        
        """
        inst = HolidayCalendar()
        sy   = pd.to_datetime(start).year
        sm   = pd.to_datetime(start).month
        sd   = pd.to_datetime(start).day
        
        ey   = pd.to_datetime(end).year
        em   = pd.to_datetime(end).month
        ed   = pd.to_datetime(end).day
        
        holidays = inst.holidays(dt.datetime(sy, sm ,sd), dt.datetime(ey, em, ed))
        
        
        return inst.holidays(dt.datetime(sy, sm ,sd), dt.datetime(ey, em, ed))
    
def to_dataframe(index, col='date'):
    
    df = pd.DataFrame(index, columns=[col])
    
    df['year']  = df[col].dt.year
    df['month'] = df[col].dt.month
    df['day']   = df[col].dt.day
    
    df = df.drop(columns=col)
    
    df['is_holiday'] = 1
    return df

holidays = to_dataframe(HolidayCalendar.get_holidays(start='2016', end='2017'))

holidays

Unnamed: 0,year,month,day,is_holiday
0,2016,1,1,1
1,2016,1,18,1
2,2016,2,7,1
3,2016,2,15,1
4,2016,3,17,1
5,2016,3,25,1
6,2016,3,27,1
7,2016,5,30,1
8,2016,7,4,1
9,2016,9,5,1


In [29]:
holidays = to_dataframe(HolidayCalendar.get_holidays(start='2016/9/20', end='2019/11/15'))

In [31]:
holidays.to_csv('holidays.csv')

In [130]:
start = "2012"

In [131]:
# start = pd.to_datetime(start)

In [132]:
month = pd.to_datetime(start).month

In [40]:
def to_dataframe(index, col='date'):
    
    df = pd.DataFrame(index, columns=[col])
    
    df['year']  = df[col].dt.year
    df['month'] = df[col].dt.month
    df['day']   = df[col].dt.day
    
    df = df.drop(columns=col)
    
    return df

In [41]:
calendar = to_dataframe(pd.date_range(start='2016/9/20', end='2019/11/15'))

In [56]:
holidays

Unnamed: 0,year,month,day,is_holiday
0,2016,1,1,1
1,2016,1,18,1
2,2016,2,7,1
3,2016,2,15,1
4,2016,3,17,1
5,2016,3,25,1
6,2016,3,27,1
7,2016,5,30,1
8,2016,7,4,1
9,2016,9,5,1


In [42]:
calendar

Unnamed: 0,year,month,day
0,2016,9,20
1,2016,9,21
2,2016,9,22
3,2016,9,23
4,2016,9,24
...,...,...,...
1147,2019,11,11
1148,2019,11,12
1149,2019,11,13
1150,2019,11,14


In [54]:
merged = pd.merge(calendar, holidays , how='left')

In [52]:
merged.is_holiday.value_counts()

1.0    14
Name: is_holiday, dtype: int64

In [55]:
merged

Unnamed: 0,year,month,day,is_holiday
0,2016,9,20,
1,2016,9,21,
2,2016,9,22,
3,2016,9,23,
4,2016,9,24,
...,...,...,...,...
1147,2019,11,11,
1148,2019,11,12,
1149,2019,11,13,
1150,2019,11,14,
