In [1]:
# import dependencies
import pandas as pd
import requests
import numpy as np

In [2]:

# Holidays 2019-2023 links:
url_holiday_2019 = 'https://www.boxofficemojo.com/holiday/by-year/2019/?grossesOption=calendarGrosses'
url_holiday_2020 = 'https://www.boxofficemojo.com/holiday/by-year/2020/?grossesOption=calendarGrosses'
url_holiday_2021 = 'https://www.boxofficemojo.com/holiday/by-year/2021/?grossesOption=calendarGrosses'
url_holiday_2022 = 'https://www.boxofficemojo.com/holiday/by-year/2022/?grossesOption=calendarGrosses'
url_holiday_2023 = 'https://www.boxofficemojo.com/holiday/by-year/2023/?grossesOption=calendarGrosses'

# retrieve html for links
response1 = requests.get(url_holiday_2019)
response2 = requests.get(url_holiday_2020)
response3 = requests.get(url_holiday_2021)
response4 = requests.get(url_holiday_2022)
response5 = requests.get(url_holiday_2023)

# Read responses into tables
holidays_2019 = pd.read_html(response1.text)
holidays_2020 = pd.read_html(response2.text)
holidays_2021 = pd.read_html(response3.text)
holidays_2022 = pd.read_html(response4.text)
holidays_2023 = pd.read_html(response5.text)

## Holiday Tables 2019-2023
---

In [3]:
# Create Dataframes
holidays_2019_df = holidays_2019[0]
holidays_2020_df = holidays_2020[0]
holidays_2021_df = holidays_2021[0]
holidays_2022_df = holidays_2022[0]
holidays_2023_df = holidays_2023[0]

display(len(holidays_2019_df), len(holidays_2020_df), len(holidays_2021_df), len(holidays_2022_df), len(holidays_2023_df))

22

22

23

25

23

In [4]:
# The classification of holidays is different year to year
display(holidays_2019_df['Holiday'].values)
display(holidays_2020_df['Holiday'].values)
display(holidays_2021_df['Holiday'].values)
display(holidays_2022_df['Holiday'].values)
display(holidays_2023_df['Holiday'].values)

array(["New Year's Eve", 'Christmas Day', 'Post-Thanksgiving Weekend',
       'Thanksgiving Weekend', 'Thanksgiving 4-Day Weekend',
       'Thanksgiving', 'Thanksgiving 5-Day Weekend', 'Halloween',
       "Indigenous People's Day", "Indigenous People's Day Weekend",
       'Labor Day', 'Labor Day Weekend', 'Independence Day',
       'Memorial Day', 'Memorial Day Weekend', 'Easter', 'Easter Weekend',
       "President's Day", "President's Day Weekend", 'MLK Day',
       'MLK Day Weekend', "New Year's Day"], dtype=object)

array(["New Year's Eve", 'Christmas Day', 'Post-Thanksgiving Weekend',
       'Thanksgiving Weekend', 'Thanksgiving 4-Day Weekend',
       'Thanksgiving', 'Thanksgiving 5-Day Weekend', 'Halloween',
       "Indigenous People's Day", "Indigenous People's Day Weekend",
       'Labor Day', 'Labor Day Weekend', 'Independence Day',
       'Memorial Day', 'Memorial Day Weekend', 'Easter', 'Easter Weekend',
       "President's Day", "President's Day Weekend", 'MLK Day',
       'MLK Day Weekend', "New Year's Day"], dtype=object)

array(["New Year's Eve", 'Christmas Day', 'Post-Thanksgiving Weekend',
       'Thanksgiving Weekend', 'Thanksgiving 4-Day Weekend',
       'Thanksgiving', 'Thanksgiving 5-Day Weekend', 'Halloween',
       "Indigenous People's Day", "Indigenous People's Day Weekend",
       'Labor Day', 'Labor Day Weekend', 'Independence Day',
       'Independence Day Weekend', 'Memorial Day', 'Memorial Day Weekend',
       'Easter', 'Easter Weekend', "President's Day",
       "President's Day Weekend", 'MLK Day', 'MLK Day Weekend',
       "New Year's Day"], dtype=object)

array(["New Year's Eve", 'Christmas Day', 'Christmas Weekend',
       'Post-Thanksgiving Weekend', 'Thanksgiving Weekend',
       'Thanksgiving 4-Day Weekend', 'Thanksgiving',
       'Thanksgiving 5-Day Weekend', 'Halloween',
       "Indigenous People's Day", "Indigenous People's Day Weekend",
       'Labor Day', 'Labor Day Weekend', 'Independence Day',
       'Independence Day Weekend', 'Memorial Day', 'Memorial Day Weekend',
       'Easter', 'Easter Weekend', "President's Day",
       "President's Day Weekend", 'MLK Day', 'MLK Day Weekend',
       "New Year's Day", 'New Year Weekend'], dtype=object)

array(['Christmas Day', 'Christmas Weekend', 'Post-Thanksgiving Weekend',
       'Thanksgiving Weekend', 'Thanksgiving 4-Day Weekend',
       'Thanksgiving', 'Thanksgiving 5-Day Weekend', 'Halloween',
       "Indigenous People's Day", "Indigenous People's Day Weekend",
       'Labor Day', 'Labor Day Weekend', 'Independence Day',
       'Independence Day Weekend', 'Memorial Day', 'Memorial Day Weekend',
       'Easter', 'Easter Weekend', "President's Day",
       "President's Day Weekend", 'MLK Day', 'MLK Day Weekend',
       "New Year's Day"], dtype=object)

In [5]:
# Trim the columns to Holiday, Cumulative Gross, Releases
holidays_2019_df = holidays_2019_df[['Holiday', 'Cumulative Gross', 'Releases']]
holidays_2020_df = holidays_2020_df[['Holiday', 'Cumulative Gross', 'Releases']]
holidays_2021_df = holidays_2021_df[['Holiday', 'Cumulative Gross', 'Releases']]
holidays_2022_df = holidays_2022_df[['Holiday', 'Cumulative Gross', 'Releases']]
holidays_2023_df = holidays_2023_df[['Holiday', 'Cumulative Gross', 'Releases']]

### 2019 Holiday Table 

In [6]:
df_2019 = holidays_2019_df.copy()

In [7]:
# Convert each Table
df_2019['Cumulative Gross'] = df_2019['Cumulative Gross'].str.replace(',', '')
df_2019['Cumulative Gross'] = df_2019['Cumulative Gross'].str.replace('$', '')
df_2019['Cumulative Gross'] = df_2019['Cumulative Gross'].astype('int64', copy=True)

In [8]:
df_2019

Unnamed: 0,Holiday,Cumulative Gross,Releases
0,New Year's Eve,43962854,54
1,Christmas Day,78632563,54
2,Post-Thanksgiving Weekend,90326095,87
3,Thanksgiving Weekend,180855568,90
4,Thanksgiving 4-Day Weekend,215903776,54
5,Thanksgiving,35370976,52
6,Thanksgiving 5-Day Weekend,262269314,58
7,Halloween,9536976,54
8,Indigenous People's Day,20973935,56
9,Indigenous People's Day Weekend,161052554,59


In [9]:
# Drop redundant holiday rows
df_2019_trim = df_2019.drop([2,3,4,5,8,10,13,15,17,19])
# Reset Index
df_2019_trim.reset_index(inplace=True)
# Trim the columns
df_2019_trim = df_2019_trim[['Holiday', 'Cumulative Gross', 'Releases']]

df_2019_trim

Unnamed: 0,Holiday,Cumulative Gross,Releases
0,New Year's Eve,43962854,54
1,Christmas Day,78632563,54
2,Thanksgiving 5-Day Weekend,262269314,58
3,Halloween,9536976,54
4,Indigenous People's Day Weekend,161052554,59
5,Labor Day Weekend,120764144,82
6,Independence Day,46565065,54
7,Memorial Day Weekend,230819376,97
8,Easter Weekend,121560823,74
9,President's Day Weekend,152447001,86


In [10]:
# Encode each holiday as a month
# holiday month list
month_name_list = []

# loop thru list
for i in range(len(df_2019_trim)):
    if df_2019_trim['Holiday'].iloc[i] in ["New Year's Eve", "Christmas Day"]:
        month_name_list.append('December')
    elif df_2019_trim['Holiday'].iloc[i] in ["Thanksgiving 5-Day Weekend"]:
        month_name_list.append('November')
    elif df_2019_trim['Holiday'].iloc[i] in ["Halloween", "Indigenous People's Day Weekend"]:
        month_name_list.append('October')
    elif df_2019_trim['Holiday'].iloc[i] in ["Labor Day Weekend"]:
        month_name_list.append('September')
    elif df_2019_trim['Holiday'].iloc[i] in ['Independence Day']:
        month_name_list.append('July')
    elif df_2019_trim['Holiday'].iloc[i] in ['Memorial Day Weekend']: 
        month_name_list.append('May')
    elif df_2019_trim['Holiday'].iloc[i] in ['Easter Weekend']: 
        month_name_list.append('April')
    elif df_2019_trim['Holiday'].iloc[i] in ["President's Day Weekend"]: 
        month_name_list.append('February')
    else:
        month_name_list.append('January')
        
display(len(month_name_list))

# Add lists to df as columns
df_2019_trim['Month'] = month_name_list
df_2019_trim['Year'] = [int(2020) for x in range(len(df_2019_trim))]

df_2019_trim = df_2019_trim[['Month', 'Year', 'Holiday', 'Cumulative Gross', 'Releases']]

df_2019_trim

12

Unnamed: 0,Month,Year,Holiday,Cumulative Gross,Releases
0,December,2020,New Year's Eve,43962854,54
1,December,2020,Christmas Day,78632563,54
2,November,2020,Thanksgiving 5-Day Weekend,262269314,58
3,October,2020,Halloween,9536976,54
4,October,2020,Indigenous People's Day Weekend,161052554,59
5,September,2020,Labor Day Weekend,120764144,82
6,July,2020,Independence Day,46565065,54
7,May,2020,Memorial Day Weekend,230819376,97
8,April,2020,Easter Weekend,121560823,74
9,February,2020,President's Day Weekend,152447001,86


In [11]:
# Sum up the Gross Rev and # of Releases for each month
df_month_2019 = df_2019_trim[['Month', 'Cumulative Gross', 'Releases']].groupby(by='Month').sum()

# Add a Year Column
df_month_2019['Year'] = [int(2019) for x in range(len(df_month_2019))]

# Reset the Index and Trim the Dataframe
df_month_2019 = df_month_2019.reset_index()
df_month_2019 = df_month_2019[['Month', 'Year', 'Cumulative Gross', 'Releases']]

# Rename columns
df_month_2019 = df_month_2019.rename(columns = {'Cumulative Gross': 'Holiday Gross', 'Releases': 'Holiday Releases'})

df_month_2019

Unnamed: 0,Month,Year,Holiday Gross,Holiday Releases
0,April,2019,121560823,74
1,December,2019,122595417,108
2,February,2019,152447001,86
3,January,2019,221945359,122
4,July,2019,46565065,54
5,May,2019,230819376,97
6,November,2019,262269314,58
7,October,2019,170589530,113
8,September,2019,120764144,82


### 2020 Holiday Table

In [12]:
holidays_2020_df

Unnamed: 0,Holiday,Cumulative Gross,Releases
0,New Year's Eve,"$2,933,369",17
1,Christmas Day,"$10,214,882",27
2,Post-Thanksgiving Weekend,"$8,397,393",41
3,Thanksgiving Weekend,"$14,152,936",41
4,Thanksgiving 4-Day Weekend,"$17,660,248",29
5,Thanksgiving,"$3,570,668",27
6,Thanksgiving 5-Day Weekend,"$20,251,599",36
7,Halloween,"$3,791,211",35
8,Indigenous People's Day,"$1,001,260",29
9,Indigenous People's Day Weekend,"$11,259,812",32


In [13]:
# Drop redundant holiday rows
df_2020_trim = holidays_2020_df.drop([2,3,4,5,8,10,13,15,17,19])
# Reset Index
df_2020_trim.reset_index(inplace=True)
# Trim the columns
df_2020_trim = df_2020_trim[['Holiday', 'Cumulative Gross', 'Releases']]

df_2020_trim

Unnamed: 0,Holiday,Cumulative Gross,Releases
0,New Year's Eve,"$2,933,369",17
1,Christmas Day,"$10,214,882",27
2,Thanksgiving 5-Day Weekend,"$20,251,599",36
3,Halloween,"$3,791,211",35
4,Indigenous People's Day Weekend,"$11,259,812",32
5,Labor Day Weekend,"$19,858,242",25
6,Independence Day,"$74,261",9
7,Memorial Day Weekend,"$240,390",6
8,Easter Weekend,"$3,675",2
9,President's Day Weekend,"$182,920,720",78


In [14]:
# Convert Table
df_2020_trim['Cumulative Gross'] = df_2020_trim['Cumulative Gross'].str.replace(',', '')
df_2020_trim['Cumulative Gross'] = df_2020_trim['Cumulative Gross'].str.replace('$', '')
df_2020_trim['Cumulative Gross'] = df_2020_trim['Cumulative Gross'].astype('int64', copy=True)

In [15]:
# Encode each holiday as a month
# holiday month list
month_name_list = []

# loop thru list
for i in range(len(df_2020_trim)):
    if df_2020_trim['Holiday'].iloc[i] in ["New Year's Eve", "Christmas Day"]:
        month_name_list.append('December')
    elif df_2020_trim['Holiday'].iloc[i] in ["Thanksgiving 5-Day Weekend"]:
        month_name_list.append('November')
    elif df_2020_trim['Holiday'].iloc[i] in ["Halloween", "Indigenous People's Day Weekend"]:
        month_name_list.append('October')
    elif df_2020_trim['Holiday'].iloc[i] in ["Labor Day Weekend"]:
        month_name_list.append('September')
    elif df_2020_trim['Holiday'].iloc[i] in ['Independence Day']:
        month_name_list.append('July')
    elif df_2020_trim['Holiday'].iloc[i] in ['Memorial Day Weekend']: 
        month_name_list.append('May')
    elif df_2020_trim['Holiday'].iloc[i] in ['Easter Weekend']: 
        month_name_list.append('April')
    elif df_2020_trim['Holiday'].iloc[i] in ["President's Day Weekend"]: 
        month_name_list.append('February')
    else:
        month_name_list.append('January')
        
display(len(month_name_list))

# Add lists to df as columns
df_2020_trim['Month'] = month_name_list
df_2020_trim['Year'] = [int(2020) for x in range(len(df_2020_trim))]

df_2020_trim = df_2020_trim[['Month', 'Year', 'Holiday', 'Cumulative Gross', 'Releases']]

df_2020_trim

12

Unnamed: 0,Month,Year,Holiday,Cumulative Gross,Releases
0,December,2020,New Year's Eve,2933369,17
1,December,2020,Christmas Day,10214882,27
2,November,2020,Thanksgiving 5-Day Weekend,20251599,36
3,October,2020,Halloween,3791211,35
4,October,2020,Indigenous People's Day Weekend,11259812,32
5,September,2020,Labor Day Weekend,19858242,25
6,July,2020,Independence Day,74261,9
7,May,2020,Memorial Day Weekend,240390,6
8,April,2020,Easter Weekend,3675,2
9,February,2020,President's Day Weekend,182920720,78


In [16]:
# Sum up the Gross Rev and # of Releases for each month
df_month_2020 = df_2020_trim[['Month', 'Cumulative Gross', 'Releases']].groupby(by='Month').sum()

# Add a Year Column
df_month_2020['Year'] = [int(2020) for x in range(len(df_month_2020))]

# Reset the Index and Trim the Dataframe
df_month_2020 = df_month_2020.reset_index()
df_month_2020 = df_month_2020[['Month', 'Year', 'Cumulative Gross', 'Releases']]

# Rename columns
df_month_2020 = df_month_2020.rename(columns = {'Cumulative Gross': 'Holiday Gross', 'Releases': 'Holiday Releases'})

df_month_2020

Unnamed: 0,Month,Year,Holiday Gross,Holiday Releases
0,April,2020,3675,2
1,December,2020,13148251,44
2,February,2020,182920720,78
3,January,2020,261809863,115
4,July,2020,74261,9
5,May,2020,240390,6
6,November,2020,20251599,36
7,October,2020,15051023,67
8,September,2020,19858242,25


### 2021 Holiday Table

In [17]:
holidays_2021_df

Unnamed: 0,Holiday,Cumulative Gross,Releases
0,New Year's Eve,"$28,345,913",25
1,Christmas Day,"$58,100,508",26
2,Post-Thanksgiving Weekend,"$52,704,939",47
3,Thanksgiving Weekend,"$96,526,140",51
4,Thanksgiving 4-Day Weekend,"$116,694,008",38
5,Thanksgiving,"$20,380,609",35
6,Thanksgiving 5-Day Weekend,"$142,082,464",42
7,Halloween,"$16,292,807",34
8,Indigenous People's Day,"$14,257,686",29
9,Indigenous People's Day Weekend,"$122,533,028",34


In [18]:
# Drop redundant holiday rows
df_2021_trim = holidays_2021_df.drop([2,3,4,5,8,10,12,14,16,18,20])
# Reset Index
df_2021_trim.reset_index(inplace=True)
# Trim the columns
df_2021_trim = df_2021_trim[['Holiday', 'Cumulative Gross', 'Releases']]

df_2021_trim

Unnamed: 0,Holiday,Cumulative Gross,Releases
0,New Year's Eve,"$28,345,913",25
1,Christmas Day,"$58,100,508",26
2,Thanksgiving 5-Day Weekend,"$142,082,464",42
3,Halloween,"$16,292,807",34
4,Indigenous People's Day Weekend,"$122,533,028",34
5,Labor Day Weekend,"$139,020,923",48
6,Independence Day Weekend,"$87,171,208",46
7,Memorial Day Weekend,"$97,874,228",45
8,Easter Weekend,"$47,518,581",28
9,President's Day Weekend,"$13,132,939",32


In [19]:
# Convert Table
df_2021_trim['Cumulative Gross'] = df_2021_trim['Cumulative Gross'].str.replace(',', '')
df_2021_trim['Cumulative Gross'] = df_2021_trim['Cumulative Gross'].str.replace('$', '')
df_2021_trim['Cumulative Gross'] = df_2021_trim['Cumulative Gross'].astype('int64', copy=True)

In [20]:
# Encode each holiday as a month
# holiday month list
month_name_list = []

# loop thru list
for i in range(len(df_2020_trim)):
    if df_2021_trim['Holiday'].iloc[i] in ["New Year's Eve", "Christmas Day"]:
        month_name_list.append('December')
    elif df_2021_trim['Holiday'].iloc[i] in ["Thanksgiving 5-Day Weekend"]:
        month_name_list.append('November')
    elif df_2021_trim['Holiday'].iloc[i] in ["Halloween", "Indigenous People's Day Weekend"]:
        month_name_list.append('October')
    elif df_2021_trim['Holiday'].iloc[i] in ["Labor Day Weekend"]:
        month_name_list.append('September')
    elif df_2021_trim['Holiday'].iloc[i] in ['Independence Day', 'Independence Day Weekend']:
        month_name_list.append('July')
    elif df_2021_trim['Holiday'].iloc[i] in ['Memorial Day Weekend']: 
        month_name_list.append('May')
    elif df_2021_trim['Holiday'].iloc[i] in ['Easter Weekend']: 
        month_name_list.append('April')
    elif df_2021_trim['Holiday'].iloc[i] in ["President's Day Weekend"]: 
        month_name_list.append('February')
    else:
        month_name_list.append('January')
        
display(len(month_name_list))

# Add lists to df as columns
df_2021_trim['Month'] = month_name_list
df_2021_trim['Year'] = [int(2021) for x in range(len(df_2021_trim))]

df_2021_trim = df_2021_trim[['Month', 'Year', 'Holiday', 'Cumulative Gross', 'Releases']]

df_2021_trim

12

Unnamed: 0,Month,Year,Holiday,Cumulative Gross,Releases
0,December,2021,New Year's Eve,28345913,25
1,December,2021,Christmas Day,58100508,26
2,November,2021,Thanksgiving 5-Day Weekend,142082464,42
3,October,2021,Halloween,16292807,34
4,October,2021,Indigenous People's Day Weekend,122533028,34
5,September,2021,Labor Day Weekend,139020923,48
6,July,2021,Independence Day Weekend,87171208,46
7,May,2021,Memorial Day Weekend,97874228,45
8,April,2021,Easter Weekend,47518581,28
9,February,2021,President's Day Weekend,13132939,32


In [21]:
# Sum up the Gross Rev and # of Releases for each month
df_month_2021 = df_2021_trim[['Month', 'Cumulative Gross', 'Releases']].groupby(by='Month').sum()

# Add a Year Column
df_month_2021['Year'] = [int(2021) for x in range(len(df_month_2021))]

# Reset the Index and Trim the Dataframe
df_month_2021 = df_month_2021.reset_index()
df_month_2021 = df_month_2021[['Month', 'Year', 'Cumulative Gross', 'Releases']]

# Rename columns
df_month_2021 = df_month_2021.rename(columns = {'Cumulative Gross': 'Holiday Gross', 'Releases': 'Holiday Releases'})

df_month_2021

Unnamed: 0,Month,Year,Holiday Gross,Holiday Releases
0,April,2021,47518581,28
1,December,2021,86446421,51
2,February,2021,13132939,32
3,January,2021,17576635,43
4,July,2021,87171208,46
5,May,2021,97874228,45
6,November,2021,142082464,42
7,October,2021,138825835,68
8,September,2021,139020923,48


### 2022 Holiday Table

In [22]:
holidays_2022_df

Unnamed: 0,Holiday,Cumulative Gross,Releases
0,New Year's Eve,"$28,295,543",31
1,Christmas Day,"$43,796,821",28
2,Christmas Weekend,"$142,814,390",33
3,Post-Thanksgiving Weekend,"$52,669,526",54
4,Thanksgiving Weekend,"$93,780,231",49
5,Thanksgiving 4-Day Weekend,"$111,394,577",36
6,Thanksgiving,"$17,723,816",31
7,Thanksgiving 5-Day Weekend,"$134,030,714",37
8,Halloween,"$7,440,404",34
9,Indigenous People's Day,"$8,616,769",39


In [23]:
# Drop redundant holiday rows
df_2022_trim = holidays_2022_df.drop([1,3,4,5,6,9,11,13,15,17,19,21,24])
# Reset Index
df_2022_trim.reset_index(inplace=True)
# Trim the columns
df_2022_trim = df_2022_trim[['Holiday', 'Cumulative Gross', 'Releases']]

df_2022_trim

Unnamed: 0,Holiday,Cumulative Gross,Releases
0,New Year's Eve,"$28,295,543",31
1,Christmas Weekend,"$142,814,390",33
2,Thanksgiving 5-Day Weekend,"$134,030,714",37
3,Halloween,"$7,440,404",34
4,Indigenous People's Day Weekend,"$67,265,387",45
5,Labor Day Weekend,"$71,172,772",55
6,Independence Day Weekend,"$223,580,059",40
7,Memorial Day Weekend,"$223,364,859",47
8,Easter Weekend,"$119,498,996",28
9,President's Day Weekend,"$111,455,709",51


In [24]:
# Convert Table
df_2022_trim['Cumulative Gross'] = df_2022_trim['Cumulative Gross'].str.replace(',', '')
df_2022_trim['Cumulative Gross'] = df_2022_trim['Cumulative Gross'].str.replace('$', '')
df_2022_trim['Cumulative Gross'] = df_2022_trim['Cumulative Gross'].astype('int64', copy=True)

In [25]:
# Encode each holiday as a month
# holiday month list
month_name_list = []

# loop thru list
for i in range(len(df_2020_trim)):
    if df_2022_trim['Holiday'].iloc[i] in ["New Year's Eve", "Christmas Day", "Christmas Weekend"]:
        month_name_list.append('December')
    elif df_2022_trim['Holiday'].iloc[i] in ["Thanksgiving 5-Day Weekend"]:
        month_name_list.append('November')
    elif df_2022_trim['Holiday'].iloc[i] in ["Halloween", "Indigenous People's Day Weekend"]:
        month_name_list.append('October')
    elif df_2022_trim['Holiday'].iloc[i] in ["Labor Day Weekend"]:
        month_name_list.append('September')
    elif df_2022_trim['Holiday'].iloc[i] in ['Independence Day', 'Independence Day Weekend']:
        month_name_list.append('July')
    elif df_2022_trim['Holiday'].iloc[i] in ['Memorial Day Weekend']: 
        month_name_list.append('May')
    elif df_2022_trim['Holiday'].iloc[i] in ['Easter Weekend']: 
        month_name_list.append('April')
    elif df_2022_trim['Holiday'].iloc[i] in ["President's Day Weekend"]: 
        month_name_list.append('February')
    else:
        month_name_list.append('January')
        
display(len(month_name_list))

# Add lists to df as columns
df_2022_trim['Month'] = month_name_list
df_2022_trim['Year'] = [int(2022) for x in range(len(df_2022_trim))]

df_2022_trim = df_2022_trim[['Month', 'Year', 'Holiday', 'Cumulative Gross', 'Releases']]

df_2022_trim

12

Unnamed: 0,Month,Year,Holiday,Cumulative Gross,Releases
0,December,2022,New Year's Eve,28295543,31
1,December,2022,Christmas Weekend,142814390,33
2,November,2022,Thanksgiving 5-Day Weekend,134030714,37
3,October,2022,Halloween,7440404,34
4,October,2022,Indigenous People's Day Weekend,67265387,45
5,September,2022,Labor Day Weekend,71172772,55
6,July,2022,Independence Day Weekend,223580059,40
7,May,2022,Memorial Day Weekend,223364859,47
8,April,2022,Easter Weekend,119498996,28
9,February,2022,President's Day Weekend,111455709,51


In [26]:
# Sum up the Gross Rev and # of Releases for each month
df_month_2022 = df_2022_trim[['Month', 'Cumulative Gross', 'Releases']].groupby(by='Month').sum()

# Add a Year Column
df_month_2022['Year'] = [int(2022) for x in range(len(df_month_2022))]

# Reset the Index and Trim the Dataframe
df_month_2022 = df_month_2022.reset_index()
df_month_2022 = df_month_2022[['Month', 'Year', 'Cumulative Gross', 'Releases']]

# Rename columns
df_month_2022 = df_month_2022.rename(columns = {'Cumulative Gross': 'Holiday Gross', 'Releases': 'Holiday Releases'})

df_month_2022

Unnamed: 0,Month,Year,Holiday Gross,Holiday Releases
0,April,2022,119498996,28
1,December,2022,171109933,64
2,February,2022,111455709,51
3,January,2022,125403205,63
4,July,2022,223580059,40
5,May,2022,223364859,47
6,November,2022,134030714,37
7,October,2022,74705791,79
8,September,2022,71172772,55


### 2023 Holiday Table

In [27]:
holidays_2023_df

Unnamed: 0,Holiday,Cumulative Gross,Releases
0,Christmas Day,"$62,526,953",34
1,Christmas Weekend,"$146,273,143",36
2,Post-Thanksgiving Weekend,"$97,203,446",52
3,Thanksgiving Weekend,"$116,005,630",54
4,Thanksgiving 4-Day Weekend,"$138,550,022",37
5,Thanksgiving,"$22,728,673",31
6,Thanksgiving 5-Day Weekend,"$173,203,005",40
7,Halloween,"$10,315,133",35
8,Indigenous People's Day,"$10,021,464",44
9,Indigenous People's Day Weekend,"$82,594,224",48


In [35]:
# Drop redundant holiday rows
df_2023_trim = holidays_2023_df.drop([0,2,3,4,5,8,10,12,14,16,18,20])
# Reset Index
df_2023_trim.reset_index(inplace=True)
# Trim the columns
df_2023_trim = df_2023_trim[['Holiday', 'Cumulative Gross', 'Releases']]

df_2023_trim

Unnamed: 0,Holiday,Cumulative Gross,Releases
0,Christmas Weekend,"$146,273,143",36
1,Thanksgiving 5-Day Weekend,"$173,203,005",40
2,Halloween,"$10,315,133",35
3,Indigenous People's Day Weekend,"$82,594,224",48
4,Labor Day Weekend,"$115,070,634",60
5,Independence Day Weekend,"$196,321,885",28
6,Memorial Day Weekend,"$203,750,914",61
7,Easter Weekend,"$231,916,311",39
8,President's Day Weekend,"$166,970,765",57
9,MLK Day Weekend,"$124,779,112",43


In [36]:
# Convert Table
df_2023_trim['Cumulative Gross'] = df_2023_trim['Cumulative Gross'].str.replace(',', '')
df_2023_trim['Cumulative Gross'] = df_2023_trim['Cumulative Gross'].str.replace('$', '')
df_2023_trim['Cumulative Gross'] = df_2023_trim['Cumulative Gross'].astype('int64', copy=True)

In [37]:
# Encode each holiday as a month
# holiday month list
month_name_list = []

# loop thru list
for i in range(len(df_2023_trim)):
    if df_2023_trim['Holiday'].iloc[i] in ["New Year's Eve", "Christmas Day", "Christmas Weekend"]:
        month_name_list.append('December')
    elif df_2023_trim['Holiday'].iloc[i] in ["Thanksgiving 5-Day Weekend"]:
        month_name_list.append('November')
    elif df_2023_trim['Holiday'].iloc[i] in ["Halloween", "Indigenous People's Day Weekend"]:
        month_name_list.append('October')
    elif df_2023_trim['Holiday'].iloc[i] in ["Labor Day Weekend"]:
        month_name_list.append('September')
    elif df_2023_trim['Holiday'].iloc[i] in ['Independence Day', 'Independence Day Weekend']:
        month_name_list.append('July')
    elif df_2023_trim['Holiday'].iloc[i] in ['Memorial Day Weekend']: 
        month_name_list.append('May')
    elif df_2023_trim['Holiday'].iloc[i] in ['Easter Weekend']: 
        month_name_list.append('April')
    elif df_2023_trim['Holiday'].iloc[i] in ["President's Day Weekend"]: 
        month_name_list.append('February')
    else:
        month_name_list.append('January')
        
display(len(month_name_list))

# Add lists to df as columns
df_2023_trim['Month'] = month_name_list
df_2023_trim['Year'] = [int(2023) for x in range(len(df_2023_trim))]

df_2023_trim = df_2023_trim[['Month', 'Year', 'Holiday', 'Cumulative Gross', 'Releases']]

df_2023_trim

11

Unnamed: 0,Month,Year,Holiday,Cumulative Gross,Releases
0,December,2023,Christmas Weekend,146273143,36
1,November,2023,Thanksgiving 5-Day Weekend,173203005,40
2,October,2023,Halloween,10315133,35
3,October,2023,Indigenous People's Day Weekend,82594224,48
4,September,2023,Labor Day Weekend,115070634,60
5,July,2023,Independence Day Weekend,196321885,28
6,May,2023,Memorial Day Weekend,203750914,61
7,April,2023,Easter Weekend,231916311,39
8,February,2023,President's Day Weekend,166970765,57
9,January,2023,MLK Day Weekend,124779112,43


In [38]:
# Sum up the Gross Rev and # of Releases for each month
df_month_2023 = df_2023_trim[['Month', 'Cumulative Gross', 'Releases']].groupby(by='Month').sum()

# Add back the Year Column
df_month_2023['Year'] = [int(2023) for x in range(len(df_month_2023))]

# Reset the Index and Trim the Dataframe
df_month_2023 = df_month_2023.reset_index()
df_month_2023 = df_month_2023[['Month', 'Year', 'Cumulative Gross', 'Releases']]

# Rename columns
df_month_2023 = df_month_2023.rename(columns = {'Cumulative Gross': 'Holiday Gross', 'Releases': 'Holiday Releases'})

df_month_2023

Unnamed: 0,Month,Year,Holiday Gross,Holiday Releases
0,April,2023,231916311,39
1,December,2023,146273143,36
2,February,2023,166970765,57
3,January,2023,161313013,74
4,July,2023,196321885,28
5,May,2023,203750914,61
6,November,2023,173203005,40
7,October,2023,92909357,83
8,September,2023,115070634,60


### Combined Holiday Table 2019-2023

In [None]:
# Looking to make a combined table csv file named master_holiday_monthly_revenue.csv

In [39]:
display(df_month_2019.columns, df_month_2020.columns, df_month_2021.columns, df_month_2022.columns, df_month_2023.columns)
display(df_month_2019.head(1), df_month_2020.head(1), df_month_2021.head(1), df_month_2022.head(1), df_month_2023.head(1))

Index(['Month', 'Year', 'Holiday Gross', 'Holiday Releases'], dtype='object')

Index(['Month', 'Year', 'Holiday Gross', 'Holiday Releases'], dtype='object')

Index(['Month', 'Year', 'Holiday Gross', 'Holiday Releases'], dtype='object')

Index(['Month', 'Year', 'Holiday Gross', 'Holiday Releases'], dtype='object')

Index(['Month', 'Year', 'Holiday Gross', 'Holiday Releases'], dtype='object')

Unnamed: 0,Month,Year,Holiday Gross,Holiday Releases
0,April,2019,121560823,74


Unnamed: 0,Month,Year,Holiday Gross,Holiday Releases
0,April,2020,3675,2


Unnamed: 0,Month,Year,Holiday Gross,Holiday Releases
0,April,2021,47518581,28


Unnamed: 0,Month,Year,Holiday Gross,Holiday Releases
0,April,2022,119498996,28


Unnamed: 0,Month,Year,Holiday Gross,Holiday Releases
0,April,2023,231916311,39


In [43]:
# Merge all the holiday tables
holiday_merged_df = pd.concat(
                              [df_month_2019, 
                               df_month_2020,
                               df_month_2021, 
                               df_month_2022, 
                               df_month_2023
                              ], 
                              axis=0
                             )

display(len(holiday_merged_df))
holiday_merged_df

45

Unnamed: 0,Month,Year,Holiday Gross,Holiday Releases
0,April,2019,121560823,74
1,December,2019,122595417,108
2,February,2019,152447001,86
3,January,2019,221945359,122
4,July,2019,46565065,54
5,May,2019,230819376,97
6,November,2019,262269314,58
7,October,2019,170589530,113
8,September,2019,120764144,82
0,April,2020,3675,2


In [44]:
holiday_merged_df.to_csv('master_holiday_monthly_revenue.csv')