In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta

In [2]:
# 1. Navigate to https://www.billboard.com/charts/hot-100/. Using BeautifulSoup, extract out the This Week, artist, song, Last Week, Peak Position, 
# and Weeks on Chart values into a pandas DataFrame. Hint: The HTML for the number one ranked song is slightly different from that of the rest of the songs.
url = 'https://www.billboard.com/charts/hot-100/'

# GET request
response = requests.get(url)

# Convert response to  BeautifulSoup object
soup = BeautifulSoup(response.content, 'html.parser')

In [3]:
this_week = []
artists = []
songs = []
last_week = []
peak_position = []
weeks_on_chart = []

# Find all chart items
chart_items = soup.find_all('div', class_='o-chart-results-list-row-container')

for item in chart_items:
    # Extract This Week (position)
    position = item.find('span', class_='c-label').get_text(strip=True)
    this_week.append(position)
    
    # Extract Song title
    song = item.find('h3', class_='c-title').get_text(strip=True)
    songs.append(song)
    
    # Extract Artist
    artist = item.find('span', class_='c-label').find_next('span', class_='c-label').get_text(strip=True)
    artists.append(artist)
    
    # Extract Last Week, Peak Position, Weeks on Chart
    meta_data = item.find_all('li', class_='o-chart-results-list__item')
    # Last Week is the third last li, Peak Position is the second last, Weeks on Chart is the last
    last_week.append(meta_data[-3].find('span', class_='c-label').get_text(strip=True))
    peak_position.append(meta_data[-2].find('span', class_='c-label').get_text(strip=True))
    weeks_on_chart.append(meta_data[-1].find('span', class_='c-label').get_text(strip=True))

# Create DataFrame
df = pd.DataFrame({
    'This Week': this_week,
    'Artist': artists,
    'Song': songs,
    'Last Week': last_week,
    'Peak Position': peak_position,
    'Weeks on Chart': weeks_on_chart
})

print(df)

   This Week                  Artist                  Song Last Week  \
0          1    Kendrick Lamar & SZA                Luther         1   
1          2  Lady Gaga & Bruno Mars      Die With A Smile         3   
2          3               Shaboozey    A Bar Song (Tipsy)         6   
3          4                     NEW          Just In Case         -   
4          5           Chappell Roan        Pink Pony Club         9   
..       ...                     ...                   ...       ...   
95        96              RE-\nENTRY          Push 2 Start         -   
96        97                     NEW           Sunset Blvd         -   
97        98           Playboi Carti             Fine Shit        33   
98        99              RE-\nENTRY        Que Pasaria...         -   
99       100              RE-\nENTRY  Voy A Llevarte Pa PR         -   

   Peak Position Weeks on Chart  
0              1             18  
1              1             32  
2              1             50  

In [4]:
# 2. After getting the code working for the current chart, navigate to last week's chart. Notice how the url for the page changes. 
# Write a function which will, given a date, return a pandas DataFrame containing the Billboard chart data for that date.
def get_billboard_chart(date):
    url = f'https://www.billboard.com/charts/hot-100/{date}/'
    headers = {"User-Agent": "Mozilla/5.0"}  # Bypass potential blocking
    response = requests.get(url, headers=headers)
    
    if response.status_code != 200:
        print("Failed to retrieve data")
        return pd.DataFrame()
    
    soup = BeautifulSoup(response.text, 'html.parser')
    
    this_week = []
    artists = []
    songs = []
    last_week = []
    peak_position = []
    weeks_on_chart = []

    chart_items = soup.find_all('div', class_='o-chart-results-list-row-container')
    
    for item in chart_items:
        position_tag = item.find('span', class_='c-label')
        song_tag = item.find('h3', class_='c-title')
        artist_tag = item.find('span', class_='c-label').find_next('span', class_='c-label')
        meta_data = item.find_all('li', class_='o-chart-results-list__item')
        
        if position_tag and song_tag and artist_tag and len(meta_data) >= 3:
            this_week.append(position_tag.get_text(strip=True))
            songs.append(song_tag.get_text(strip=True))
            artists.append(artist_tag.get_text(strip=True))
            last_week.append(meta_data[-3].find('span', class_='c-label').get_text(strip=True) if meta_data[-3].find('span', class_='c-label') else 'N/A')
            peak_position.append(meta_data[-2].find('span', class_='c-label').get_text(strip=True) if meta_data[-2].find('span', class_='c-label') else 'N/A')
            weeks_on_chart.append(meta_data[-1].find('span', class_='c-label').get_text(strip=True) if meta_data[-1].find('span', class_='c-label') else 'N/A')
    
    df = pd.DataFrame({
        'This Week': this_week,
        'Artist': artists,
        'Song': songs,
        'Last Week': last_week,
        'Peak Position': peak_position,
        'Weeks on Chart': weeks_on_chart
    })
    
    return df

# usage
df = get_billboard_chart('2024-03-30')
print(df)

   This Week                                             Artist  \
0          1                                        Teddy Swims   
1          2                                       Benson Boone   
2          3                                      Ariana Grande   
3          4                                        Jack Harlow   
4          5  ¥$: Ye & Ty Dolla $ign Featuring Rich The Kid ...   
..       ...                                                ...   
95        96                                                NEW   
96        97                                         Zach Bryan   
97        98                                                NEW   
98        99                            Kali Uchis & Peso Pluma   
99       100                                         RE-\nENTRY   

                                        Song Last Week Peak Position  \
0                               Lose Control         2             1   
1                           Beautiful Things       

In [5]:
# 3. Write a loop to retrieve the Billboard chart data for the last 10 weeks.

In [13]:
def get_billboard_chart(date):
    url = f'https://www.billboard.com/charts/hot-100/{date}/'
    headers = {"User-Agent": "Mozilla/5.0"}  # Bypass potential blocking
    response = requests.get(url, headers=headers)
    
    if response.status_code != 200:
        print(f"Failed to retrieve data for {date}. Status code: {response.status_code}")
        return pd.DataFrame()
    
    soup = BeautifulSoup(response.text, 'html.parser')
    
    this_week, artists, songs = [], [], []
    last_week, peak_position, weeks_on_chart = [], [], []

    chart_items = soup.find_all('div', class_='o-chart-results-list-row-container')
    
    for item in chart_items:
        position_tag = item.find('span', class_='c-label')
        song_tag = item.find('h3', class_='c-title')
        artist_tag = item.find('span', class_='c-label').find_next('span', class_='c-label')
        meta_data = item.find_all('li', class_='o-chart-results-list__item')
        
        this_week.append(position_tag.get_text(strip=True) if position_tag else 'N/A')
        songs.append(song_tag.get_text(strip=True) if song_tag else 'N/A')
        artists.append(artist_tag.get_text(strip=True) if artist_tag else 'N/A')
        last_week.append(meta_data[-3].find('span', class_='c-label').get_text(strip=True) if len(meta_data) >= 3 and meta_data[-3].find('span', class_='c-label') else 'N/A')
        peak_position.append(meta_data[-2].find('span', class_='c-label').get_text(strip=True) if len(meta_data) >= 2 and meta_data[-2].find('span', class_='c-label') else 'N/A')
        weeks_on_chart.append(meta_data[-1].find('span', class_='c-label').get_text(strip=True) if len(meta_data) >= 1 and meta_data[-1].find('span', class_='c-label') else 'N/A')
    
    df = pd.DataFrame({
        'Date': date,
        'This Week': this_week,
        'Artist': artists,
        'Song': songs,
        'Last Week': last_week,
        'Peak Position': peak_position,
        'Weeks on Chart': weeks_on_chart
    })
    
    return df

def get_last_n_weeks(n):
    all_charts = []
    current_date = datetime.today()
    for i in range(n):
        chart_date = (current_date - timedelta(weeks=i)).strftime('%Y-%m-%d')
        print(f"Fetching data for {chart_date}...")
        df = get_billboard_chart(chart_date)
        if not df.empty:
            all_charts.append(df)
    return pd.concat(all_charts, ignore_index=True) if all_charts else pd.DataFrame()


bb10 = get_last_n_weeks(10)
print(bb10)

Fetching data for 2025-04-07...
Fetching data for 2025-03-31...
Fetching data for 2025-03-24...
Fetching data for 2025-03-17...
Fetching data for 2025-03-10...
Fetching data for 2025-03-03...
Fetching data for 2025-02-24...
Fetching data for 2025-02-17...
Fetching data for 2025-02-10...
Fetching data for 2025-02-03...
           Date This Week                         Artist                Song  \
0    2025-03-31         1           Kendrick Lamar & SZA              Luther   
1    2025-03-31         2         Lady Gaga & Bruno Mars    Die With A Smile   
2    2025-03-31         3                      Shaboozey  A Bar Song (Tipsy)   
3    2025-03-31         4                            NEW        Just In Case   
4    2025-03-31         5                  Chappell Roan      Pink Pony Club   
..          ...       ...                            ...                 ...   
895  2025-02-03        96  Rauw Alejandro & Romeo Santos                Khe?   
896  2025-02-03        97               