# Holidays (2022–2025) for selected countries

We will study the dataset referring holidays, specifically for the following countries:

- India
- United States
- Canada
- Australia


# Imports

In [15]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

from dotenv import load_dotenv, dotenv_values
import os

from sqlalchemy import create_engine, types
from sqlalchemy import text # to be able to pass string
from sqlalchemy import Integer, String, Float, DateTime, Date

# Load table from database

In [16]:
# Loading values from .env
config = dotenv_values()

# Define variables for the login
load_dotenv()
user = os.getenv("DB_USER")
password = os.getenv("DB_PASSWORD")
host = os.getenv("DB_HOST")
port = os.getenv("DB_PORT")
dbname = os.getenv("DB_NAME")
schema = os.getenv("DB_SCHEMA")

# PostgreSQL URL creation
url = f'postgresql://{user}:{password}@{host}:{port}/{dbname}'

# Create engine
engine = create_engine(url)

# Load table
with engine.connect() as conn:
    conn.execute(text(f"SET search_path TO {schema}"))
    holidays_raw = pd.read_sql(
        text("SELECT * FROM jl_holiday_calendar"),
        conn
    )

In [9]:
holidays_raw

Unnamed: 0,Country,Country Code,Day,Date,Holiday Name,Type,Comments
0,india,IN,Saturday,2022-04-02,1st Navratra,Regional Holiday,Jammu and Kashmir
1,india,IN,Wednesday,2022-10-26,Accession Day,Regional Holiday,Jammu and Kashmir
2,australia,AU,Monday,2022-03-14,Adelaide Cup,Regional Holiday,2nd Monday in March. SA Only
3,india,IN,Tuesday,2022-08-09,Adivasi Divas,Regional Holiday,
4,india,IN,Monday,2022-09-26,Agarsain Jayanti,Regional Holiday,Several states
...,...,...,...,...,...,...,...
379,new-zealand,NZ,Monday,2022-11-28,Westland Anniversary Day,Regional Holiday,Monday nearest to December 1 (Greymouth)
380,india,IN,Saturday,2022-12-03,World Disabled Day,Regional Holiday,Tripura only
381,india,IN,Friday,2022-03-18,Yaosang,Regional Holiday,Manipur
382,india,IN,Saturday,2022-03-19,Yaosang 2nd Day,Regional Holiday,Manipur


# Focus on selected countries

In [17]:
countries = [
    "GB",
    "NZ",
    "IN",
    "US",
    "CA",
    "AU",
]

# Keep only the selected countries
holidays_raw = holidays_raw[holidays_raw["Country Code"].isin(countries)].copy()

holidays_raw

Unnamed: 0,Country,Country Code,Day,Date,Holiday Name,Type,Comments
0,india,IN,Saturday,2022-04-02,1st Navratra,Regional Holiday,Jammu and Kashmir
1,india,IN,Wednesday,2022-10-26,Accession Day,Regional Holiday,Jammu and Kashmir
2,australia,AU,Monday,2022-03-14,Adelaide Cup,Regional Holiday,2nd Monday in March. SA Only
3,india,IN,Tuesday,2022-08-09,Adivasi Divas,Regional Holiday,
4,india,IN,Monday,2022-09-26,Agarsain Jayanti,Regional Holiday,Several states
...,...,...,...,...,...,...,...
379,new-zealand,NZ,Monday,2022-11-28,Westland Anniversary Day,Regional Holiday,Monday nearest to December 1 (Greymouth)
380,india,IN,Saturday,2022-12-03,World Disabled Day,Regional Holiday,Tripura only
381,india,IN,Friday,2022-03-18,Yaosang,Regional Holiday,Manipur
382,india,IN,Saturday,2022-03-19,Yaosang 2nd Day,Regional Holiday,Manipur


Data is already filtered and selected by the six countries of interest.

# Quick checks

In [18]:
# Quick descriptive of the dataset
holidays_raw.describe()

Unnamed: 0,Country,Country Code,Day,Date,Holiday Name,Type,Comments
count,384,384,384,384,384,384,384.0
unique,6,6,7,171,285,8,186.0
top,india,IN,Monday,2022-12-26,Christmas Holiday,Regional Holiday,
freq,243,243,106,11,8,304,69.0


With this, we understand that:

- All countries are represented in the table
- The most repeated holiday is Christmas
- Most of the holidays are regional ones
- Most holidays occur on a Monday for the year of the dataset (2022)

In [20]:
# Basic stats per country
holidays_raw.groupby("Country Code").describe()

Unnamed: 0_level_0,Country,Country,Country,Country,Day,Day,Day,Day,Date,Date,...,Holiday Name,Holiday Name,Type,Type,Type,Type,Comments,Comments,Comments,Comments
Unnamed: 0_level_1,count,unique,top,freq,count,unique,top,freq,count,unique,...,top,freq,count,unique,top,freq,count,unique,top,freq
Country Code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AU,36,1,australia,36,36,6,Monday,18,36,28,...,Labour Day,4,36,4,Regional Holiday,24,36,29,,4
CA,35,1,canada,35,35,5,Monday,22,35,25,...,Boxing Day,2,35,4,Regional Holiday,22,35,26,Newfoundland and Labrador,5
GB,23,1,united-kingdom,23,23,7,Monday,6,23,23,...,August Bank Holiday,1,23,3,Bank Holiday,9,23,16,,8
IN,243,1,india,243,243,7,Tuesday,45,243,142,...,Christmas Holiday,8,243,4,Regional Holiday,236,243,97,,37
NZ,29,1,new-zealand,29,29,5,Monday,16,29,27,...,Anzac Day,1,29,3,National Holiday,15,29,19,,10
US,18,1,usa,18,18,5,Monday,10,18,16,...,Christmas Day,1,18,3,Federal Holiday,13,18,11,,7


India is the country with more holidays by large. We need to understand that there may be a variety of regional holidays causing this. We may want to compare only national/federal level holidays.

For this, we will filter the holidays which are "National Holiday", "Federal Holiday", "Bank Holiday", "Statutory Holiday", "Public Holiday"

In [23]:
holidays_raw["Type"].unique()


array(['Regional Holiday', 'National Holiday', 'Government Holiday',
       'Not A Public Holiday', 'Bank Holiday', 'Statutory Holiday',
       'Federal Holiday', 'Public Holiday'], dtype=object)

In [27]:
remove_list = ['Regional Holiday','Government Holiday','Not A Public Holiday','Regional Holiday']
keep_list = ["National Holiday", "Federal Holiday", "Bank Holiday", "Statutory Holiday", "Public Holiday"]
filtered_holidays = holidays_raw[holidays_raw['Type'].isin(keep_list)]

# Holidays per country
filtered_holidays.groupby("Country Code").describe()
                                 



Unnamed: 0_level_0,Country,Country,Country,Country,Day,Day,Day,Day,Date,Date,...,Holiday Name,Holiday Name,Type,Type,Type,Type,Comments,Comments,Comments,Comments
Unnamed: 0_level_1,count,unique,top,freq,count,unique,top,freq,count,unique,...,top,freq,count,unique,top,freq,count,unique,top,freq
Country Code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AU,8,1,australia,8,8,6,Monday,3,8,8,...,Anzac Day,1,8,1,National Holiday,8,8,3,,4
CA,4,1,canada,4,4,4,Friday,1,4,4,...,Canada Day,1,4,1,Statutory Holiday,4,4,3,,2
GB,9,1,united-kingdom,9,9,6,Monday,3,9,9,...,Boxing Day,1,9,1,Bank Holiday,9,9,3,,7
IN,3,1,india,3,3,3,Monday,1,3,3,...,Independence Day,1,3,1,Public Holiday,3,3,2,,2
NZ,15,1,new-zealand,15,15,5,Monday,7,15,15,...,Anzac Day,1,15,1,National Holiday,15,15,5,,10
US,13,1,usa,13,13,5,Monday,8,13,13,...,Christmas Day,1,13,1,Federal Holiday,13,13,8,,5
