# Finding holidays in India for next year

## install the holidays lib

In [52]:
#!pip install holidays

### import libs

In [53]:
from datetime import date
import holidays 

import calendar
import pandas as pd
import datetime

specify country and subdivision --> in our case the states

In [54]:
IN_holidays = holidays.country_holidays('IN',subdiv='MH',years=2023,observed=True).items()
for h in IN_holidays:
    print(h)

(datetime.date(2023, 1, 14), 'Makar Sankranti / Pongal')
(datetime.date(2023, 1, 26), 'Republic Day')
(datetime.date(2023, 8, 15), 'Independence Day')
(datetime.date(2023, 10, 2), 'Gandhi Jayanti')
(datetime.date(2023, 5, 1), 'Labour Day, Maharashtra Day')
(datetime.date(2023, 12, 25), 'Christmas')
(datetime.date(2023, 4, 14), "Dr. B. R. Ambedkar's Jayanti")
(datetime.date(2023, 10, 15), 'Dussehra')
(datetime.date(2023, 10, 12), 'Diwali')
(datetime.date(2023, 3, 7), 'Holi')


note that few regional holidays got added. Try looking for neighboring states - 'KA' or 'MP'

### create dataframe from holidays 

In [55]:
df=pd.DataFrame(IN_holidays,columns =['Date','Occassion'])
df

Unnamed: 0,Date,Occassion
0,2023-01-14,Makar Sankranti / Pongal
1,2023-01-26,Republic Day
2,2023-08-15,Independence Day
3,2023-10-02,Gandhi Jayanti
4,2023-05-01,"Labour Day, Maharashtra Day"
5,2023-12-25,Christmas
6,2023-04-14,Dr. B. R. Ambedkar's Jayanti
7,2023-10-15,Dussehra
8,2023-10-12,Diwali
9,2023-03-07,Holi


### Find out the day of the week and sort the dataframe by Date

In [56]:
df['Day'] = pd.to_datetime(df['Date']).dt.day_name()
df

Unnamed: 0,Date,Occassion,Day
0,2023-01-14,Makar Sankranti / Pongal,Saturday
1,2023-01-26,Republic Day,Thursday
2,2023-08-15,Independence Day,Tuesday
3,2023-10-02,Gandhi Jayanti,Monday
4,2023-05-01,"Labour Day, Maharashtra Day",Monday
5,2023-12-25,Christmas,Monday
6,2023-04-14,Dr. B. R. Ambedkar's Jayanti,Friday
7,2023-10-15,Dussehra,Sunday
8,2023-10-12,Diwali,Thursday
9,2023-03-07,Holi,Tuesday


In [57]:
df = df.sort_values(by =['Date'])

### Find what kind of a Leave it is 

    Weekend: Sat, Sun 
    LongWeekend1 : extended weekend - Mon, Fri
    Longweekend2 : Long Weekend if one leave is added : Thu , Thu
    
Using the apply method to create these 'Notes' for each holiday

In [58]:
df['Notes']=df['Day'].apply(lambda x: 'Weekend' if x in ['Saturday','Sunday'] else ('LongWeekend1' if x in ['Monday','Friday'] else ('LongWeekend2' if x in ['Thursday','Tuesday'] else "")))
df

Unnamed: 0,Date,Occassion,Day,Notes
0,2023-01-14,Makar Sankranti / Pongal,Saturday,Weekend
1,2023-01-26,Republic Day,Thursday,LongWeekend2
9,2023-03-07,Holi,Tuesday,LongWeekend2
6,2023-04-14,Dr. B. R. Ambedkar's Jayanti,Friday,LongWeekend1
4,2023-05-01,"Labour Day, Maharashtra Day",Monday,LongWeekend1
2,2023-08-15,Independence Day,Tuesday,LongWeekend2
3,2023-10-02,Gandhi Jayanti,Monday,LongWeekend1
8,2023-10-12,Diwali,Thursday,LongWeekend2
7,2023-10-15,Dussehra,Sunday,Weekend
5,2023-12-25,Christmas,Monday,LongWeekend1


    Weekend: 0 leaves needed
    LongWeekend1 : 0 leaves needed
    Longweekend2 : one leave needed
    
Using the map method to create these 'Leaves' for each holiday, can also be done using apply method as shown above

In [59]:
df['Leaves'] = df['Notes'].map({'Weekend':0, 'LongWeekend1':0,'LongWeekend2':1}) 
df

Unnamed: 0,Date,Occassion,Day,Notes,Leaves
0,2023-01-14,Makar Sankranti / Pongal,Saturday,Weekend,0
1,2023-01-26,Republic Day,Thursday,LongWeekend2,1
9,2023-03-07,Holi,Tuesday,LongWeekend2,1
6,2023-04-14,Dr. B. R. Ambedkar's Jayanti,Friday,LongWeekend1,0
4,2023-05-01,"Labour Day, Maharashtra Day",Monday,LongWeekend1,0
2,2023-08-15,Independence Day,Tuesday,LongWeekend2,1
3,2023-10-02,Gandhi Jayanti,Monday,LongWeekend1,0
8,2023-10-12,Diwali,Thursday,LongWeekend2,1
7,2023-10-15,Dussehra,Sunday,Weekend,0
5,2023-12-25,Christmas,Monday,LongWeekend1,0


Creating LeavesInternal - only used in calculting next column  

    Sat, Sun, Mon, Fri : 0
    Tue : -1 as Leave needs to be applied for previous day
    Wed : 2 as Leave would be applied for next 2 days 
    Thu : 1 as Leave would be applied for next 1 day

In [60]:
df['LeavesInternal'] = df['Day'].map({'Saturday':0, 'Sunday':0,'Monday':0 ,'Tuesday':-1,'Wednesday':2,'Thursday':1,'Friday':0}) 
df

Unnamed: 0,Date,Occassion,Day,Notes,Leaves,LeavesInternal
0,2023-01-14,Makar Sankranti / Pongal,Saturday,Weekend,0,0
1,2023-01-26,Republic Day,Thursday,LongWeekend2,1,1
9,2023-03-07,Holi,Tuesday,LongWeekend2,1,-1
6,2023-04-14,Dr. B. R. Ambedkar's Jayanti,Friday,LongWeekend1,0,0
4,2023-05-01,"Labour Day, Maharashtra Day",Monday,LongWeekend1,0,0
2,2023-08-15,Independence Day,Tuesday,LongWeekend2,1,-1
3,2023-10-02,Gandhi Jayanti,Monday,LongWeekend1,0,0
8,2023-10-12,Diwali,Thursday,LongWeekend2,1,1
7,2023-10-15,Dussehra,Sunday,Weekend,0,0
5,2023-12-25,Christmas,Monday,LongWeekend1,0,0


Counting the total number of days that can be taken off using "LeaveInternal"

In [61]:
df['TotalDays'] = df['Day'].map({'Saturday':1, 'Sunday':-1,'Monday':-2 ,'Tuesday':-3,'Wednesday':4,'Thursday':3,'Friday':2}) 
df

Unnamed: 0,Date,Occassion,Day,Notes,Leaves,LeavesInternal,TotalDays
0,2023-01-14,Makar Sankranti / Pongal,Saturday,Weekend,0,0,1
1,2023-01-26,Republic Day,Thursday,LongWeekend2,1,1,3
9,2023-03-07,Holi,Tuesday,LongWeekend2,1,-1,-3
6,2023-04-14,Dr. B. R. Ambedkar's Jayanti,Friday,LongWeekend1,0,0,2
4,2023-05-01,"Labour Day, Maharashtra Day",Monday,LongWeekend1,0,0,-2
2,2023-08-15,Independence Day,Tuesday,LongWeekend2,1,-1,-3
3,2023-10-02,Gandhi Jayanti,Monday,LongWeekend1,0,0,-2
8,2023-10-12,Diwali,Thursday,LongWeekend2,1,1,3
7,2023-10-15,Dussehra,Sunday,Weekend,0,0,-1
5,2023-12-25,Christmas,Monday,LongWeekend1,0,0,-2


adding the column for Months based on 'Date'

In [62]:
df['Month'] = pd.to_datetime(df['Date']).dt.month_name()
df

Unnamed: 0,Date,Occassion,Day,Notes,Leaves,LeavesInternal,TotalDays,Month
0,2023-01-14,Makar Sankranti / Pongal,Saturday,Weekend,0,0,1,January
1,2023-01-26,Republic Day,Thursday,LongWeekend2,1,1,3,January
9,2023-03-07,Holi,Tuesday,LongWeekend2,1,-1,-3,March
6,2023-04-14,Dr. B. R. Ambedkar's Jayanti,Friday,LongWeekend1,0,0,2,April
4,2023-05-01,"Labour Day, Maharashtra Day",Monday,LongWeekend1,0,0,-2,May
2,2023-08-15,Independence Day,Tuesday,LongWeekend2,1,-1,-3,August
3,2023-10-02,Gandhi Jayanti,Monday,LongWeekend1,0,0,-2,October
8,2023-10-12,Diwali,Thursday,LongWeekend2,1,1,3,October
7,2023-10-15,Dussehra,Sunday,Weekend,0,0,-1,October
5,2023-12-25,Christmas,Monday,LongWeekend1,0,0,-2,December


#### Find number of vacations that can be planned each month

In [63]:
vac_per_month = df.groupby(['Month'])['TotalDays'].apply(lambda x: (abs(x)>1).sum())
vac_per_month

Month
April       1
August      1
December    1
January     1
March       1
May         1
October     2
Name: TotalDays, dtype: int64

#### Creating a dataframe for only those holidays that do not fall on a weekend

In [64]:
df1 = df[abs(df['TotalDays'])>1].reset_index(drop = True)
df1

Unnamed: 0,Date,Occassion,Day,Notes,Leaves,LeavesInternal,TotalDays,Month
0,2023-01-26,Republic Day,Thursday,LongWeekend2,1,1,3,January
1,2023-03-07,Holi,Tuesday,LongWeekend2,1,-1,-3,March
2,2023-04-14,Dr. B. R. Ambedkar's Jayanti,Friday,LongWeekend1,0,0,2,April
3,2023-05-01,"Labour Day, Maharashtra Day",Monday,LongWeekend1,0,0,-2,May
4,2023-08-15,Independence Day,Tuesday,LongWeekend2,1,-1,-3,August
5,2023-10-02,Gandhi Jayanti,Monday,LongWeekend1,0,0,-2,October
6,2023-10-12,Diwali,Thursday,LongWeekend2,1,1,3,October
7,2023-12-25,Christmas,Monday,LongWeekend1,0,0,-2,December


Adding a column for consolidated list of holidays 

In [65]:
import numpy as np
hh=[]
df1['HolidayList'] = df1['Date']
for k in range(len(df1.index)):
    h=[]
    for m in range(abs(df1['TotalDays'][k])+1):
        s = pd.to_datetime(df1['Date'][k])+pd.DateOffset(days=int(m*np.sign(df1['TotalDays'][k])))
        h.append(pd.to_datetime(s, format='%Y%m%d'))
    hh.append(sorted(h))
    df1.at[k,'HolidayList']=pd.to_datetime(hh[k])
df1

Unnamed: 0,Date,Occassion,Day,Notes,Leaves,LeavesInternal,TotalDays,Month,HolidayList
0,2023-01-26,Republic Day,Thursday,LongWeekend2,1,1,3,January,"DatetimeIndex(['2023-01-26', '2023-01-27', '20..."
1,2023-03-07,Holi,Tuesday,LongWeekend2,1,-1,-3,March,"DatetimeIndex(['2023-03-04', '2023-03-05', '20..."
2,2023-04-14,Dr. B. R. Ambedkar's Jayanti,Friday,LongWeekend1,0,0,2,April,"DatetimeIndex(['2023-04-14', '2023-04-15', '20..."
3,2023-05-01,"Labour Day, Maharashtra Day",Monday,LongWeekend1,0,0,-2,May,"DatetimeIndex(['2023-04-29', '2023-04-30', '20..."
4,2023-08-15,Independence Day,Tuesday,LongWeekend2,1,-1,-3,August,"DatetimeIndex(['2023-08-12', '2023-08-13', '20..."
5,2023-10-02,Gandhi Jayanti,Monday,LongWeekend1,0,0,-2,October,"DatetimeIndex(['2023-09-30', '2023-10-01', '20..."
6,2023-10-12,Diwali,Thursday,LongWeekend2,1,1,3,October,"DatetimeIndex(['2023-10-12', '2023-10-13', '20..."
7,2023-12-25,Christmas,Monday,LongWeekend1,0,0,-2,December,"DatetimeIndex(['2023-12-23', '2023-12-24', '20..."


### Calculating some stats for these holidays

In [66]:
leaves_spent = sum(df.Leaves)
print("Total "+str(leaves_spent)+" leaves spent")
EL = 12
CL = 8
Cool = 8
print("Total "+str(CL-leaves_spent)+" leaves left")
print("Num Vacation plans = " + str(sum(abs(df['TotalDays'])>1)))
print("Total days off = "+str(sum(abs(df['TotalDays']))))

Total 4 leaves spent
Total 4 leaves left
Num Vacation plans = 8
Total days off = 22


### Consolidated list of all days off

In [67]:
hh

[[Timestamp('2023-01-26 00:00:00'),
  Timestamp('2023-01-27 00:00:00'),
  Timestamp('2023-01-28 00:00:00'),
  Timestamp('2023-01-29 00:00:00')],
 [Timestamp('2023-03-04 00:00:00'),
  Timestamp('2023-03-05 00:00:00'),
  Timestamp('2023-03-06 00:00:00'),
  Timestamp('2023-03-07 00:00:00')],
 [Timestamp('2023-04-14 00:00:00'),
  Timestamp('2023-04-15 00:00:00'),
  Timestamp('2023-04-16 00:00:00')],
 [Timestamp('2023-04-29 00:00:00'),
  Timestamp('2023-04-30 00:00:00'),
  Timestamp('2023-05-01 00:00:00')],
 [Timestamp('2023-08-12 00:00:00'),
  Timestamp('2023-08-13 00:00:00'),
  Timestamp('2023-08-14 00:00:00'),
  Timestamp('2023-08-15 00:00:00')],
 [Timestamp('2023-09-30 00:00:00'),
  Timestamp('2023-10-01 00:00:00'),
  Timestamp('2023-10-02 00:00:00')],
 [Timestamp('2023-10-12 00:00:00'),
  Timestamp('2023-10-13 00:00:00'),
  Timestamp('2023-10-14 00:00:00'),
  Timestamp('2023-10-15 00:00:00')],
 [Timestamp('2023-12-23 00:00:00'),
  Timestamp('2023-12-24 00:00:00'),
  Timestamp('2023-12-