In [0]:
%pip install lxml

In [0]:
import pandas as pd
import os

# Folder to save Billboard CSVs
output_folder = "billboard_year_end_charts"
os.makedirs(output_folder, exist_ok=True)

# Years to scrape
years = list(range(2000, 2026))

for year in years:
    try:
        url = f"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_{year}"
        print(f"Scraping {url} ...")
        
        # Read all tables from the page
        tables = pd.read_html(url)
        
        # Usually the first table is the main chart
        df = tables[0]
        
        # Standardize column names
        df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns]
        
        # Add year column
        df['year'] = year
        
        # Optional: reorder columns if needed
        if 'rank' in df.columns and 'title' in df.columns and 'artist' in df.columns:
            df = df[['rank', 'title', 'artist', 'year'] + [c for c in df.columns if c not in ['rank','title','artist','year']]]
        
        # Save CSV
        file_path = os.path.join(output_folder, f'billboard_year_end_{year}.csv')
        df.to_csv(file_path, index=False)
        
        print(f"Saved CSV for {year} â†’ {file_path}")
    
    except Exception as e:
        print(f"Error scraping {year}: {e}")

print("All done!")