In [1]:
# 26th July 2025 
# Web Scraping Script 

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# Retry strategy
session = requests.Session()
retry = Retry(
    total=5,
    backoff_factor=1,
    status_forcelist=[429, 500, 502, 503, 504],
    raise_on_status=False,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount("https://", adapter)
session.mount("http://", adapter)

# Target years
years = list(range(2010, 2025))

# Output data
data = []

# Loop through years
for year in years:
    url = f"https://www.bollywoodhungama.com/box-office-collections/filterbycountry/IND/{year}"
    print(f"üîÑ Scraping year {year} ‚Üí {url}")
    
    try:
        response = session.get(url, headers={'User-Agent': 'Mozilla/5.0'}, timeout=10)
        soup = BeautifulSoup(response.text, "html.parser")
        time.sleep(2)  # wait for table to load

        rows = soup.find_all("tr", class_="table-row")
        if not rows:
            print(f"‚ö†Ô∏è No data table found for year {year}")
            continue

        for row in rows:
            cells = row.find_all("td", class_="table-cell")
            if len(cells) >= 6:
                data.append({
                    "year": year,
                    "movie_name": cells[0].get_text(strip=True),
                    "release_date": cells[1].get_text(strip=True),
                    "opening_day": cells[2].get_text(strip=True),
                    "opening_weekend": cells[3].get_text(strip=True),
                    "week_1": cells[4].get_text(strip=True),
                    "lifetime_collection": cells[5].get_text(strip=True)
                })

        print(f"‚úÖ {len(rows)} movies scraped for year {year}")

    except Exception as e:
        print(f"‚ùå Error scraping year {year}: {e}")

# Save to CSV
df = pd.DataFrame(data)
df.to_csv("bollywood_box_office_2010_2024.csv", index=False)
print("üìÅ Data saved to 'bollywood_box_office_2010_2024.csv'")

üîÑ Scraping year 2010 ‚Üí https://www.bollywoodhungama.com/box-office-collections/filterbycountry/IND/2010
‚úÖ 141 movies scraped for year 2010
üîÑ Scraping year 2011 ‚Üí https://www.bollywoodhungama.com/box-office-collections/filterbycountry/IND/2011
‚úÖ 125 movies scraped for year 2011
üîÑ Scraping year 2012 ‚Üí https://www.bollywoodhungama.com/box-office-collections/filterbycountry/IND/2012
‚úÖ 134 movies scraped for year 2012
üîÑ Scraping year 2013 ‚Üí https://www.bollywoodhungama.com/box-office-collections/filterbycountry/IND/2013
‚úÖ 139 movies scraped for year 2013
üîÑ Scraping year 2014 ‚Üí https://www.bollywoodhungama.com/box-office-collections/filterbycountry/IND/2014
‚úÖ 146 movies scraped for year 2014
üîÑ Scraping year 2015 ‚Üí https://www.bollywoodhungama.com/box-office-collections/filterbycountry/IND/2015
‚úÖ 167 movies scraped for year 2015
üîÑ Scraping year 2016 ‚Üí https://www.bollywoodhungama.com/box-office-collections/filterbycountry/IND/2016
‚úÖ 216 movies 