# Handling holidays in pandas time series

In [2]:
import pandas as pd
df = pd.read_csv("aapl_ts_no_dates_tutorial3.csv")
df.head()

Unnamed: 0,Open,High,Low,Close,Volume
0,144.88,145.3,143.1,143.5,14277848
1,143.69,144.79,142.72,144.09,21569557
2,143.02,143.5,142.41,142.73,24128782
3,142.9,144.75,142.9,144.18,19201712
4,144.11,145.95,143.37,145.06,21090636


In [4]:
pd.date_range(start= "7/1/2017", end= "7/21/2017", freq= "B")

DatetimeIndex(['2017-07-03', '2017-07-04', '2017-07-05', '2017-07-06',
               '2017-07-07', '2017-07-10', '2017-07-11', '2017-07-12',
               '2017-07-13', '2017-07-14', '2017-07-17', '2017-07-18',
               '2017-07-19', '2017-07-20', '2017-07-21'],
              dtype='datetime64[ns]', freq='B')

In [6]:
# considering US holiday calendar
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay


usb = CustomBusinessDay(calendar= USFederalHolidayCalendar())
usb

<CustomBusinessDay>

In [7]:
# use custom calendar as frequency, freq = usb
# excludes all us holidays
pd.date_range(start= "7/1/2017", end= "7/21/2017", freq= usb)

DatetimeIndex(['2017-07-03', '2017-07-05', '2017-07-06', '2017-07-07',
               '2017-07-10', '2017-07-11', '2017-07-12', '2017-07-13',
               '2017-07-14', '2017-07-17', '2017-07-18', '2017-07-19',
               '2017-07-20', '2017-07-21'],
              dtype='datetime64[ns]', freq='C')

In [9]:
# adding it to the df
rng = pd.date_range(start= "7/1/2017", end= "7/21/2017", freq= usb)
df.set_index(rng, inplace= True)
df

Unnamed: 0,Open,High,Low,Close,Volume
2017-07-03,144.88,145.3,143.1,143.5,14277848
2017-07-05,143.69,144.79,142.72,144.09,21569557
2017-07-06,143.02,143.5,142.41,142.73,24128782
2017-07-07,142.9,144.75,142.9,144.18,19201712
2017-07-10,144.11,145.95,143.37,145.06,21090636
2017-07-11,144.73,145.85,144.38,145.53,19781836
2017-07-12,145.87,146.18,144.82,145.74,24884478
2017-07-13,145.5,148.49,145.44,147.77,25199373
2017-07-14,147.97,149.33,147.33,149.04,20132061
2017-07-17,148.82,150.9,148.57,149.56,23793456


# custom holiday calendar

In [17]:
from pandas.tseries.holiday import AbstractHolidayCalendar, nearest_workday, Holiday

class myBirthDayCalendar(AbstractHolidayCalendar):
    """
    US Federal Government Holiday Calendar based on rules specified by:
    https://www.opm.gov/policy-data-oversight/
       snow-dismissal-procedures/federal-holidays/
    """

    rules = [
        Holiday("Kimani's Birthday", month=12, day=12, observance= nearest_workday)
       
    ]

# observance =  nearest_workday,falls on weekend
myc = CustomBusinessDay(calendar= myBirthDayCalendar())
myc

<CustomBusinessDay>

In [18]:
pd.date_range(start= "12/1/2021", end= "12/30/2021", freq= myc)

DatetimeIndex(['2021-12-01', '2021-12-02', '2021-12-03', '2021-12-06',
               '2021-12-07', '2021-12-08', '2021-12-09', '2021-12-10',
               '2021-12-14', '2021-12-15', '2021-12-16', '2021-12-17',
               '2021-12-20', '2021-12-21', '2021-12-22', '2021-12-23',
               '2021-12-24', '2021-12-27', '2021-12-28', '2021-12-29',
               '2021-12-30'],
              dtype='datetime64[ns]', freq='C')

In [31]:
# incase friday and saturday are weekends, and sunday a normal working day
# define a custom bs day

b = CustomBusinessDay(weekmask= 'Sun Mon Tue Wed Thu', holidays=["2021-12-12", "2021-12-25", "2021-12-26"])
pd.date_range(start= "12/1/2021", end= "12/30/2021", freq= b)

DatetimeIndex(['2021-12-01', '2021-12-02', '2021-12-05', '2021-12-06',
               '2021-12-07', '2021-12-08', '2021-12-09', '2021-12-13',
               '2021-12-14', '2021-12-15', '2021-12-16', '2021-12-19',
               '2021-12-20', '2021-12-21', '2021-12-22', '2021-12-23',
               '2021-12-27', '2021-12-28', '2021-12-29', '2021-12-30'],
              dtype='datetime64[ns]', freq='C')