# scrape top 100 Songs from website

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

url = "https://www.billboard.com/charts/hot-100/"

response = requests.get(url)
response.raise_for_status()

soup = BeautifulSoup(response.text, "html.parser")

# Function to clean invisible characters
def clean_text(text):
    text = text.strip()  # remove leading/trailing whitespace
    text = re.sub(r'\s+', ' ', text)  # replace multiple whitespace/newlines/tabs with single space
    return text

# ---- H3 TITLE CLASSES ----
h3_classes = [
    "c-title",
    "a-font-basic",
    "u-letter-spacing-0010",
    "u-max-width-397",
    "lrv-u-font-size-16",
    "u-line-height-22px",
    "u-word-spacing-0063",
    "a-truncate-ellipsis-2line",
    "lrv-u-margin-b-025",
]

h3_selector = "h3." + ".".join(h3_classes)
song_names = [clean_text(item.get_text()) for item in soup.select(h3_selector)]

# ---- SPAN ARTIST CLASSES ----
span_classes = [
    "c-label",
    "a-no-trucate",
    "a-font-secondary",
    "u-font-size-15",
    "u-letter-spacing-0010",
    "u-line-height-21px",
    "a-children-link-color-black",
    "lrv-u-display-block",
    "a-truncate-ellipsis-2line",
    "u-max-width-397",
]

span_selector = "span." + ".".join(span_classes)
artist_names = [clean_text(item.get_text()) for item in soup.select(span_selector)]

# ---- Create DataFrame ----
min_len = min(len(song_names), len(artist_names))
df = pd.DataFrame({
    "song_title": song_names[:min_len],
    "artist_name": artist_names[:min_len]
})



print(df)


                           song_title  \
0     All I Want For Christmas Is You   
1                      Last Christmas   
2   Rockin' Around The Christmas Tree   
3                    Jingle Bell Rock   
4                              Golden   
..                                ...   
95                           For Good   
96                       No Good Deed   
97         Let Alone The One You Love   
98             The Life Of A Showgirl   
99              When Did You Get Hot?   

                                 artist_name  
0                               Mariah Carey  
1                                      Wham!  
2                                 Brenda Lee  
3                                Bobby Helms  
4       HUNTR/X: EJAE, Audrey Nuna & REI AMI  
..                                       ...  
95             Cynthia Erivo & Ariana Grande  
96                             Cynthia Erivo  
97                               Olivia Dean  
98  Taylor Swift Featuring Sabrina Ca

# get the track IDs for every song

In [3]:
import spotipy
import pandas as pd
import json
from spotipy.oauth2 import SpotifyClientCredentials
import config

#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))


track_ids = []

# Loop through each row in the DataFrame
for index, row in df.iterrows():
    query = f"{row['song_title']} by {row['artist_name']}"
    results = sp.search(q=query, limit=1, market="GB")
    print("query", query)

    # Check if a track is found
    if results['tracks']['items']:
        track_id = results['tracks']['items'][0]['id']
    else:
        track_id = None  # If no track found
    print("track_id", track_id)
    track_ids.append(track_id)

# Add track IDs to the DataFrame
df['track_id'] = track_ids

query All I Want For Christmas Is You by Mariah Carey
track_id 1dsU1IcJ4MTZXeApbwuu4B
query Last Christmas by Wham!
track_id 3KaotdNHy44p4Ah9Yt0yKb
query Rockin' Around The Christmas Tree by Brenda Lee
track_id 0zr8GA2HroEZ1mtncgW3xI
query Jingle Bell Rock by Bobby Helms
track_id 5u54rqcQl2UYJgTyNLuTHv
query Golden by HUNTR/X: EJAE, Audrey Nuna & REI AMI
track_id 5tVnzVWIKrmmuOPGbXUbT4
query The Fate Of Ophelia by Taylor Swift
track_id 1dosSsKwYF7hoQA2kuu6wI
query Ordinary by Alex Warren
track_id 6qqrTXSdwiJaq8SO0X2lSe
query Santa Tell Me by Ariana Grande
track_id 26yZeDdPKAnPQrmxaXoqk3
query The Christmas Song (Merry Christmas To You) by Nat "King" Cole
track_id 4PK1GubWVTmaOi4Og0Phr4
query It's The Most Wonderful Time Of The Year by Andy Williams
track_id 4zSvqMbMKsyEVuiKXMY44O
query Underneath The Tree by Kelly Clarkson
track_id 45GY7KeQBmWgXvTlU6zFkU
query Man I Need by Olivia Dean
track_id 1qbmS6ep2hbBRaEZFpn7BX
query Let It Snow! Let It Snow! Let It Snow! by Dean Martin
track_id 

# save to CSV

In [24]:
df.to_csv("trending_songs.csv", index=False)