This program scrapes data from boxofficemojo.com, puts it into a dataframe and cleans the dataframe to answer these questions:

1. What is the top-grossing movie in each season per year?

2. What is the top-grossing domestic movie for the whole year, per year?

3. What is the top-grossing movie of all time?

4. Which season do people watch the most movies?

In [139]:
## Imports
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
from IPython.display import display, HTML

In [140]:
## Main function
def main():

    ## Create dataframe
    #Information    Year    Movie    Cummulative Gross    Movies    Released    Average Gross
    #               Movie 1
    #               Movie 2
    # Winter        Movie 3
    #
    #               Movie 1
    #               Movie 2
    # Spring        Movie 3
    #etc...

    # Create dataframe
    seasonal_df = get_seasonal_data()
    display(HTML(seasonal_df.head(10).to_html().replace("\\n","<br>")))

    # Answer questions
    answer_questions(seasonal_df) 
    

In [141]:
## Method to get soup
def get_soup(url):

    # Get response
    response = requests.get(url)

    # Check response
    if response.status_code == 200:

        # Get soup
        soup = BeautifulSoup(response.content, "html.parser")
        return soup
    
    else:

        # Print error
        print("Error: Response code", response.status_code)
        return None

In [142]:
## Method to get seasonal data
def get_seasonal_data():

    # Create list of seasons and seasonal dataframe
    seasons = ["winter", "spring", "summer", "fall", "holiday"]
    seasonal_df = pd.DataFrame(columns=["Season", "Year", "Movie", "Cummulative Gross", "Releases", "Average Gross"])

    # Loop through seasons to get soup from each season
    for season in seasons:

        # Create list of desired data for dataframe
        years = []
        money = []                  # Money on website has the same class for all three types of money, so this list holds everything  
        cummulativeGross = []
        releases = []
        averageGross = []
        movies = []

        # Base url
        url = "https://www.boxofficemojo.com/season/" + season + "/?grossesOption=calendarGrosses"

        # Get soup
        soup = get_soup(url)

        # Get data if soup exists
        if soup != None:

            # Year
            yearElements = soup.find_all("td", class_="a-text-left mojo-header-column mojo-truncate mojo-field-type-year mojo-sort-column")
            years = [element.getText() for element in yearElements]

            # Cumulative Gross
            moneyElements = soup.find_all("td", class_="a-text-right mojo-field-type-money")
            money = [element.getText() for element in moneyElements]
            cummulativeGross = money[0::3]   # Every third element is the cumulative gross, second and third are average and top release gross

            # Releases
            releasesElements = soup.find_all("td", class_="a-text-right mojo-field-type-positive_integer")
            releases = [element.getText() for element in releasesElements]

            # Average Gross
            averageGross = money[1::3]      # Every second of three element is the average gross, first and third are cumulative and top release gross

            # Movies
            moviesElements = soup.find_all("td", class_="a-text-left mojo-field-type-release mojo-cell-wide")
            movies = [element.getText() for element in moviesElements]

        # Add data to dataframe
        for year in years:
            seasonal_df.loc[len(seasonal_df)] = [season.capitalize(), year, movies[years.index(year)], cummulativeGross[years.index(year)], releases[years.index(year)], averageGross[years.index(year)]]

        # Combine seasons column
        seasonal_df = seasonal_df.groupby(seasonal_df["Season"].replace("", np.nan).ffill()).agg({"Year" : '\n'.join, "Movie" : '\n'.join, "Cummulative Gross" : '\n'.join, "Releases" : '\n'.join, "Average Gross" : '\n'.join}).reset_index().dropna(subset=["Season"])


    # Return dataframe
    return seasonal_df

In [143]:
## Method to answer questions
def answer_questions(seasonal_df):

    ## 1. What is the top-grossing movie in each season per year?
    print("The seasonal dataframe shows the top-grossing movie in each season per year.")


    ## 2. What is the top-grossing domestic movie for the whole year, per year? 

    # highestGrossingMovie = {year: [movie, income]} holds the highest grossing movie for each year
    highestGrossingMovie = {}

    # movieList = [{season: [movie, income, year]}] holds information for each movie in each season
    movieList = []

    # Loop through dataframe
    for row, season in seasonal_df.iterrows():
        years = season.Year.split("\n")
        for year in years:
            movie = season.Movie.split("\n")[years.index(year)]
            income = season["Cummulative Gross"].split("\n")[years.index(year)]
            movieList.append({season.Season: [movie, income, year]})

    # Loop through movieList to find highest grossing movie for each year
    for movie in movieList:
        season = list(movie.keys())[0]
        year = movie[season][2]
        income = movie[season][1]
        if year not in highestGrossingMovie:
            highestGrossingMovie[year] = [movie[season][0], income]
        elif int(income.replace("$", "").replace(",", "")) > int(highestGrossingMovie[year][1].replace("$", "").replace(",", "")):
            highestGrossingMovie[year] = [movie[season][0], income]

    # Print highest grossing movie for each year
    print("The highest grossing movie for each year is:")
    for year in highestGrossingMovie:
        print(year, ":", highestGrossingMovie[year][0])


    ## 3. What is the top-grossing movie of all time?
    allTimeGrossingMovie = ['test', 0]
    for year in highestGrossingMovie:
        if int(highestGrossingMovie[year][1].replace("$", "").replace(",", "")) > int(str(allTimeGrossingMovie[1]).replace("$", "").replace(",", "")):
            allTimeGrossingMovie = [highestGrossingMovie[year][0], highestGrossingMovie[year][1]]

    print("\n")
    print("The highest grossing movie of all time is", allTimeGrossingMovie[0], "with a gross of", allTimeGrossingMovie[1])
    print("\n")


    ## 4. Which season do people watch the most movies?
    # grossPerSeason = {season: total gross} holds the total gross for each season
    grossPerSeason = {}

    # Loop through dataframe
    for row, season in seasonal_df.iterrows():
        years = season.Year.split("\n")
        for year in years:
            seasonName = season.Season
            income = season["Cummulative Gross"].split("\n")[years.index(year)]
            if seasonName not in grossPerSeason:
                grossPerSeason[seasonName] = int(income.replace("$", "").replace(",", ""))
            else:
                grossPerSeason[seasonName] += int(income.replace("$", "").replace(",", ""))

    # Find season with highest gross
    highestGrossingSeason = ["test", 0]
    for season in grossPerSeason:
        if grossPerSeason[season] > highestGrossingSeason[1]:
            highestGrossingSeason = [season, grossPerSeason[season]]

    highestGrossingSeason[1] = "${:,}".format(highestGrossingSeason[1])
    print("The season with the highest gross is", highestGrossingSeason[0], "with a gross of", highestGrossingSeason[1])
        


In [144]:
## Run main
if __name__ == "__main__":
    main() 

Unnamed: 0,Season,Year,Movie,Cummulative Gross,Releases,Average Gross
0,Fall,2023 2022 2021 2020 2019 2018 2017 2016 2015 2014 2013 2012 2011 2010 2009 2008 2007 2006 2005 2004 2003 2002 2001 2000 1999 1998 1997 1996 1995 1994 1993 1992 1991 1990 1989 1988 1987 1986 1985 1984 1983 1982 1981 1980 1979 1977,Taylor Swift: The Eras Tour Black Adam Venom: Let There Be Carnage Tenet Joker Venom It Sully The Martian Gone Girl Gravity Hotel Transylvania The Lion King2011 3D Release Jackass 3D Cloudy with a Chance of Meatballs Eagle Eye The Game Plan The Departed Flightplan Shark Tale Scary Movie 3 Sweet Home Alabama Training Day Meet the Parents Double Jeopardy Rush Hour In & Out The First Wives Club Se7en Forrest Gump The Fugitive The Last of the Mohicans Freddy's Dead: The Final Nightmare Ghost Sea of Love Cocktail Fatal Attraction Crocodile Dundee Back to the Future All of Me Never Say Never Again An Officer and a Gentleman Continental Divide Private Benjamin 10 Star Wars: Episode IV - A New Hope,"$926,845,054 $734,082,949 $866,355,282 $117,551,002 $1,403,820,286 $1,403,497,899 $1,173,481,576 $1,129,541,691 $1,249,071,635 $1,352,668,265 $1,124,107,841 $1,150,604,864 $1,091,570,660 $1,116,827,351 $1,145,260,534 $1,179,402,588 $978,043,143 $1,028,137,150 $983,490,411 $998,403,401 $1,226,694,885 $993,140,869 $852,184,926 $774,209,652 $871,900,675 $773,358,197 $794,845,805 $618,573,133 $627,037,580 $623,613,655 $630,374,473 $540,866,352 $461,128,534 $519,477,461 $480,394,696 $377,212,756 $409,716,267 $419,778,490 $309,058,591 $358,034,853 $312,308,675 $279,397,701 $32,113,438 $279,299,713 $118,888,887 $109,135,486",160 164 131 100 272 318 310 297 275 308 300 281 287 253 243 271 263 262 260 255 273 258 127 146 142 82 78 87 80 71 69 68 66 63 54 62 59 51 43 46 38 37 6 17 4 3,"$5,792,781 $4,476,115 $6,613,399 $1,175,510 $5,161,103 $4,413,515 $3,785,424 $3,803,170 $4,542,078 $4,391,780 $3,747,026 $4,094,679 $3,803,382 $4,414,337 $4,713,006 $4,352,039 $3,718,795 $3,924,187 $3,782,655 $3,915,307 $4,493,387 $3,849,383 $6,710,117 $5,302,805 $6,140,145 $9,431,197 $10,190,330 $7,110,036 $7,837,969 $8,783,290 $9,135,861 $7,953,916 $6,986,795 $8,245,673 $8,896,198 $6,084,076 $6,944,343 $8,230,950 $7,187,409 $7,783,366 $8,218,649 $7,551,289 $5,352,239 $16,429,394 $29,722,221 $36,378,495"
1,Holiday,2023 2022 2021 2020 2019 2018 2017 2016 2015 2014 2013 2012 2011 2010 2009 2008 2007 2006 2005 2004 2003 2002 2001 2000 1999 1998 1997 1996 1995 1994 1993 1992 1991 1990 1989 1988 1987 1986 1985 1984 1983 1982 1981 1980 1979 1978 1977 1976 1975 1972,The Hunger Games: The Ballad of Songbirds & Snakes Avatar: The Way of Water Spider-Man: No Way Home The Croods: A New Age Frozen II The Grinch Star Wars: Episode VIII - The Last Jedi Rogue One: A Star Wars Story Star Wars: Episode VII - The Force Awakens The Hunger Games: Mockingjay - Part 1 The Hunger Games: Catching Fire Skyfall The Twilight Saga: Breaking Dawn - Part 1 Harry Potter and the Deathly Hallows: Part 1 Avatar Madagascar: Escape 2 Africa I Am Legend Happy Feet Harry Potter and the Goblet of Fire The Incredibles The Lord of the Rings: The Return of the King Harry Potter and the Chamber of Secrets Harry Potter and the Sorcerer's Stone How the Grinch Stole Christmas Toy Story 2 The Waterboy Titanic Ransom Toy Story The Santa Clause Mrs. Doubtfire Home Alone 2: Lost in New York The Addams Family Home Alone Back to the Future Part II Twins Three Men and a Baby Star Trek IV: The Voyage Home Rocky IV Beverly Hills Cop Sudden Impact Tootsie Raiders of the Lost Ark 9 to 5 Star Trek: The Motion Picture Superman Close Encounters of the Third Kind King Kong Jaws The Poseidon Adventure,"$681,436,646 $1,399,001,102 $1,529,945,428 $141,313,215 $2,198,128,197 $2,227,125,300 $2,464,252,488 $2,316,973,910 $2,433,852,535 $1,817,994,796 $2,205,634,407 $2,148,217,840 $1,856,106,353 $1,916,622,857 $2,283,063,082 $1,656,471,144 $1,828,892,856 $1,811,536,356 $1,859,715,946 $1,777,761,628 $1,703,043,868 $1,884,610,262 $1,738,682,033 $1,636,532,301 $1,411,875,839 $1,330,785,803 $1,110,700,414 $1,100,350,344 $1,037,668,875 $975,520,103 $838,454,602 $957,926,493 $807,633,170 $830,366,349 $711,548,809 $632,939,999 $626,684,657 $538,182,417 $509,956,585 $489,684,428 $405,780,845 $424,519,938 $316,246,351 $525,968,293 $315,149,888 $279,843,614 $104,644,068 $52,614,445 $190,274,624 $84,563,118",121 142 130 110 245 280 247 258 242 228 244 244 233 216 214 217 242 246 236 232 237 258 134 146 130 86 66 75 74 69 70 54 65 61 62 62 50 51 48 47 39 35 17 13 12 5 4 1 1 1,"$5,631,707 $9,852,120 $11,768,810 $1,284,665 $8,971,951 $7,954,018 $9,976,730 $8,980,519 $10,057,241 $7,973,661 $9,039,485 $8,804,171 $7,966,121 $8,873,253 $10,668,519 $7,633,507 $7,557,408 $7,363,968 $7,880,152 $7,662,765 $7,185,839 $7,304,690 $12,975,239 $11,209,125 $10,860,583 $15,474,253 $16,828,794 $14,671,337 $14,022,552 $14,137,972 $11,977,922 $17,739,379 $12,425,125 $13,612,563 $11,476,593 $10,208,709 $12,533,693 $10,552,596 $10,624,095 $10,418,817 $10,404,637 $12,129,141 $18,602,726 $40,459,099 $26,262,490 $55,968,722 $26,161,017 $52,614,445 $190,274,624 $84,563,118"
2,Spring,2023 2022 2021 2020 2019 2018 2017 2016 2015 2014 2013 2012 2011 2010 2009 2008 2007 2006 2005 2004 2003 2002 2001 2000 1999 1998 1997 1996 1995 1994 1993 1992 1991 1990 1989 1988 1987 1986 1985 1984 1983 1982 1981 1980 1979 1978 1972,The Super Mario Bros. Movie The Batman Godzilla vs. Kong Onward Avengers: Endgame Avengers: Infinity War Beauty and the Beast Batman v Superman: Dawn of Justice Furious 7 Captain America: The Winter Soldier Oz the Great and Powerful The Hunger Games Rango Alice in Wonderland Monsters vs. Aliens Horton Hears a Who! 300 Ice Age: The Meltdown Robots The Passion of the Christ Bringing Down the House Ice Age Spy Kids Erin Brockovich The Matrix Titanic Liar Liar The Birdcage Outbreak Naked Gun 33 1/3: The Final Insult Indecent Proposal Basic Instinct The Silence of the Lambs The Hunt for Red October Rain Man Beetlejuice Lethal Weapon Police Academy 3: Back in Training Police Academy 2: Their First Assignment Police Academy Tootsie Porky's Excalibur Kramer vs. Kramer Manhattan Close Encounters of the Third Kind The Godfather,"$1,555,222,346 $1,184,894,657 $329,616,242 $194,341,297 $2,051,648,178 $1,982,216,289 $1,992,805,194 $1,763,173,107 $1,424,573,612 $1,463,083,028 $1,475,033,746 $1,680,418,130 $1,533,323,373 $1,637,629,395 $1,284,430,292 $1,093,528,925 $1,414,003,686 $1,315,784,734 $1,201,734,803 $1,397,820,769 $1,077,842,864 $1,328,789,651 $1,014,874,660 $1,044,595,967 $950,115,292 $867,181,032 $859,478,910 $788,637,045 $674,275,900 $631,324,060 $635,820,738 $665,811,485 $652,671,035 $695,887,131 $528,495,350 $534,469,608 $455,496,292 $378,584,881 $479,111,870 $481,364,863 $377,150,374 $365,559,677 $103,099,333 $55,923,180 $133,823,770 $40,055,891 $133,698,921",182 138 91 129 266 300 244 259 227 255 251 266 236 219 206 237 241 258 235 235 231 134 123 132 138 89 79 80 71 66 65 72 60 61 64 66 48 45 44 40 42 27 11 9 8 2 1,"$8,545,177 $8,586,193 $3,622,156 $1,506,521 $7,712,963 $6,607,387 $8,167,234 $6,807,618 $6,275,654 $5,737,580 $5,876,628 $6,317,361 $6,497,132 $7,477,759 $6,235,098 $4,614,046 $5,867,235 $5,099,940 $5,113,765 $5,948,173 $4,665,986 $9,916,340 $8,251,013 $7,913,605 $6,884,893 $9,743,607 $10,879,479 $9,857,963 $9,496,843 $9,565,516 $9,781,857 $9,247,381 $10,877,850 $11,407,985 $8,257,739 $8,098,024 $9,489,506 $8,412,997 $10,888,906 $12,034,121 $8,979,770 $13,539,247 $9,372,666 $6,213,686 $16,727,971 $20,027,945 $133,698,921"
3,Summer,2023 2022 2021 2020 2019 2018 2017 2016 2015 2014 2013 2012 2011 2010 2009 2008 2007 2006 2005 2004 2003 2002 2001 2000 1999 1998 1997 1996 1995 1994 1993 1992 1991 1990 1989 1988 1987 1986 1985 1984 1983 1982 1981 1980 1979 1978 1977 1976 1975,Barbie Top Gun: Maverick Black Widow Tenet The Lion King Incredibles 2 Wonder Woman Finding Dory Jurassic World Guardians of the Galaxy Iron Man 3 The Avengers Harry Potter and the Deathly Hallows: Part 2 Toy Story 3 Transformers: Revenge of the Fallen The Dark Knight Spider-Man 3 Pirates of the Caribbean: Dead Man's Chest Star Wars: Episode III - Revenge of the Sith Shrek 2 Finding Nemo Spider-Man Shrek Mission: Impossible II Star Wars: Episode I - The Phantom Menace Armageddon Men in Black Independence Day Batman Forever The Lion King Jurassic Park Batman Returns Terminator 2: Judgment Day Ghost Batman Who Framed Roger Rabbit Beverly Hills Cop II Top Gun Rambo: First Blood Part II Ghostbusters Star Wars: Episode VI - Return of the Jedi E.T. the Extra-Terrestrial Superman II Star Wars: Episode V - The Empire Strikes Back The Amityville Horror Grease Star Wars: Episode IV - A New Hope The Omen Jaws,"$4,039,421,938 $3,391,665,529 $1,745,734,965 $71,117,769 $4,320,749,661 $4,412,728,849 $3,824,448,891 $4,491,890,332 $4,476,142,880 $4,062,705,153 $4,754,082,341 $4,291,816,905 $4,402,553,608 $4,213,490,001 $4,326,804,910 $4,129,682,368 $4,164,268,696 $3,750,369,368 $3,530,160,138 $3,876,367,758 $3,785,461,068 $3,715,824,440 $3,304,753,341 $3,050,878,545 $3,211,296,010 $2,721,911,156 $2,326,022,557 $2,268,748,161 $2,194,490,165 $2,151,163,748 $2,075,815,978 $1,710,353,382 $1,704,239,123 $1,766,312,192 $1,841,291,888 $1,489,693,241 $1,433,373,914 $1,225,644,621 $1,306,633,399 $1,430,034,362 $1,292,569,570 $1,297,124,548 $689,091,956 $678,552,323 $554,780,367 $465,965,898 $249,398,378 $60,922,980 $69,725,376",207 204 197 87 403 426 369 394 384 367 374 379 347 320 308 350 352 331 338 361 356 287 185 189 188 125 116 107 108 97 98 101 103 82 89 96 94 74 72 61 66 54 24 27 17 5 4 1 1,"$19,514,115 $16,625,811 $8,861,598 $817,445 $10,721,463 $10,358,518 $10,364,360 $11,400,736 $11,656,622 $11,070,041 $12,711,450 $11,324,055 $12,687,474 $13,167,156 $14,048,067 $11,799,092 $11,830,308 $11,330,421 $10,444,260 $10,737,860 $10,633,317 $12,947,123 $17,863,531 $16,142,214 $17,081,361 $21,775,289 $20,051,918 $21,203,253 $20,319,353 $22,176,945 $21,181,795 $16,934,191 $16,546,010 $21,540,392 $20,688,672 $15,517,637 $15,248,658 $16,562,765 $18,147,686 $23,443,186 $19,584,387 $24,020,824 $28,712,164 $25,131,567 $32,634,139 $93,193,179 $62,349,594 $60,922,980 $69,725,376"
4,Winter,2023 2022 2021 2020 2019 2018 2017 2016 2015 2014 2013 2012 2011 2010 2009 2008 2007 2006 2005 2004 2003 2002 2001 2000 1999 1998 1997 1996 1995 1994 1993 1992 1991 1990 1989 1988 1987 1986 1985 1984 1983 1982 1981 1980 1979 1978,"Avatar: The Way of Water Spider-Man: No Way Home The Croods: A New Age Bad Boys for Life Glass Black Panther Hidden Figures Deadpool American Sniper The Lego Movie Identity Thief The Vow The Green Hornet Avatar Gran Torino Juno Night at the Museum Big Momma's House 2 Hitch The Passion of the Christ Chicago A Beautiful Mind Hannibal Scream 3 Payback Titanic Star Wars: Episode IV - A New Hope1997 Special Edition Mr. Holland's Opus Legends of the Fall Mrs. Doubtfire Aladdin The Hand That Rocks the Cradle Home Alone Born on the Fourth of July Rain Man Good Morning, Vietnam Platoon The Color Purple Beverly Hills Cop Terms of Endearment Tootsie Arthur The Incredible Shrinking Woman Kramer vs. Kramer The Warriors Saturday Night Fever","$988,194,743 $661,066,924 $111,175,629 $1,504,156,401 $1,323,177,659 $1,833,913,655 $1,573,307,580 $1,574,337,691 $1,775,193,282 $1,632,094,517 $1,339,107,335 $1,515,408,914 $1,266,845,880 $1,562,819,621 $1,779,967,719 $1,531,520,035 $1,261,241,362 $1,274,579,280 $1,257,197,005 $1,410,917,542 $1,394,186,085 $1,233,942,113 $1,205,276,250 $980,306,373 $921,159,154 $1,131,769,680 $1,032,076,617 $828,682,036 $664,469,322 $716,830,195 $663,118,243 $735,623,263 $723,885,584 $560,575,443 $542,521,648 $494,294,932 $538,751,137 $528,305,068 $423,291,845 $339,011,395 $352,893,290 $452,029,099 $22,282,732 $116,522,486 $35,517,896 $120,714,492",132 94 77 208 201 220 197 191 224 228 204 209 192 175 177 205 193 204 193 221 227 112 108 114 112 71 78 66 62 61 61 52 50 53 53 48 50 52 51 35 30 31 2 10 2 3,"$7,486,323 $7,032,626 $1,443,839 $7,231,521 $6,582,973 $8,335,971 $7,986,332 $8,242,605 $7,924,970 $7,158,309 $6,564,251 $7,250,760 $6,598,155 $8,930,397 $10,056,314 $7,470,829 $6,534,929 $6,247,937 $6,513,974 $6,384,242 $6,141,788 $11,017,340 $11,159,965 $8,599,178 $8,224,635 $15,940,418 $13,231,751 $12,555,788 $10,717,247 $11,751,314 $10,870,790 $14,146,601 $14,477,711 $10,576,895 $10,236,257 $10,297,811 $10,775,022 $10,159,712 $8,299,840 $9,686,039 $11,763,109 $14,581,583 $11,141,366 $11,652,248 $17,758,948 $40,238,164"


The seasonal dataframe shows the top-grossing movie in each season per year.
The highest grossing movie for each year is:
2023 : Barbie
2022 : Top Gun: Maverick
2021 : Black Widow
2020 : Bad Boys for Life
2019 : The Lion King
2018 : Incredibles 2
2017 : Wonder Woman
2016 : Finding Dory
2015 : Jurassic World
2014 : Guardians of the Galaxy
2013 : Iron Man 3
2012 : The Avengers
2011 : Harry Potter and the Deathly Hallows: Part 2
2010 : Toy Story 3
2009 : Transformers: Revenge of the Fallen
2008 : The Dark Knight
2007 : Spider-Man 3
2006 : Pirates of the Caribbean: Dead Man's Chest
2005 : Star Wars: Episode III - Revenge of the Sith
2004 : Shrek 2
2003 : Finding Nemo
2002 : Spider-Man
2001 : Shrek
2000 : Mission: Impossible II
1999 : Star Wars: Episode I - The Phantom Menace
1998 : Armageddon
1997 : Men in Black
1996 : Independence Day
1995 : Batman Forever
1994 : The Lion King
1993 : Jurassic Park
1992 : Batman Returns
1991 : Terminator 2: Judgment Day
1990 : Ghost
1989 : Batman
1988 : Wh