<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Import-libraries" data-toc-modified-id="Import-libraries-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Import libraries</a></span></li><li><span><a href="#Store-the-hot-100-songs-list-from-billboard-url-in-a-variable" data-toc-modified-id="Store-the-hot-100-songs-list-from-billboard-url-in-a-variable-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Store the hot-100 songs list from billboard url in a variable</a></span></li><li><span><a href="#Download-html-with-a-get-request" data-toc-modified-id="Download-html-with-a-get-request-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Download html with a get request</a></span></li><li><span><a href="#Parse-html-(create-the-'soup')" data-toc-modified-id="Parse-html-(create-the-'soup')-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Parse html (create the 'soup')</a></span></li><li><span><a href="#Retrieve/extract-the-desired-info" data-toc-modified-id="Retrieve/extract-the-desired-info-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Retrieve/extract the desired info</a></span></li><li><span><a href="#Get-the-text" data-toc-modified-id="Get-the-text-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Get the text</a></span></li><li><span><a href="#Build-a-dataframe" data-toc-modified-id="Build-a-dataframe-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Build a dataframe</a></span></li><li><span><a href="#Extra-questions" data-toc-modified-id="Extra-questions-8"><span class="toc-item-num">8&nbsp;&nbsp;</span>Extra questions</a></span></li></ul></div>

## Import libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

# Billboard Hot 100 data

## Store the hot-100 songs list from billboard url in a variable

In [2]:
url = "https://www.billboard.com/charts/hot-100"

## Download html with a get request

In [3]:
response = requests.get(url)
response.status_code
# 200 status code means OK!

200

## Parse html (create the 'soup')

In [4]:
soup = BeautifulSoup(response.content, "html.parser")

## Retrieve/extract the desired info

In [5]:
# songs
songs = soup.find_all("span", class_="chart-element__information__song text--truncate color--primary")

# artists
artists = soup.find_all("span", class_="chart-element__information__artist text--truncate color--secondary")

# last week
last_week_ranks = soup.find_all("span", class_= "chart-element__meta text--center color--secondary text--last")

# peak rank
peak_ranks = soup.find_all("span", class_= "chart-element__meta text--center color--secondary text--peak")

# weeks on chart
weeks_on_chart = soup.find_all("span", class_= "chart-element__meta text--center color--secondary text--week")


In [6]:
songs

[<span class="chart-element__information__song text--truncate color--primary">My Universe</span>,
 <span class="chart-element__information__song text--truncate color--primary">Stay</span>,
 <span class="chart-element__information__song text--truncate color--primary">Industry Baby</span>,
 <span class="chart-element__information__song text--truncate color--primary">Way 2 Sexy</span>,
 <span class="chart-element__information__song text--truncate color--primary">Fancy Like</span>,
 <span class="chart-element__information__song text--truncate color--primary">Bad Habits</span>,
 <span class="chart-element__information__song text--truncate color--primary">Good 4 U</span>,
 <span class="chart-element__information__song text--truncate color--primary">Kiss Me More</span>,
 <span class="chart-element__information__song text--truncate color--primary">Knife Talk</span>,
 <span class="chart-element__information__song text--truncate color--primary">Levitating</span>,
 <span class="chart-element__inf

## Get the text

In [7]:
for item in [songs, artists, last_week_ranks, peak_ranks, weeks_on_chart]:
    for txt in range(len(item)):
        item[txt] = item[txt].getText()

In [8]:
songs

['My Universe',
 'Stay',
 'Industry Baby',
 'Way 2 Sexy',
 'Fancy Like',
 'Bad Habits',
 'Good 4 U',
 'Kiss Me More',
 'Knife Talk',
 'Levitating',
 'Essence',
 'Save Your Tears',
 'Montero (Call Me By Your Name)',
 'Shivers',
 'Heat Waves',
 'Need To Know',
 'Girls Want Girls',
 'You Right',
 "Beggin'",
 'Wockesha',
 "If I Didn't Love You",
 'Take My Breath',
 'Fair Trade',
 'Thats What I Want',
 'Traitor',
 'Cold Beer Calling My Name',
 'Chasing After You',
 'Bad Morning',
 'Pepas',
 'Deja Vu',
 'Happier Than Ever',
 'Your Heart',
 'Leave The Door Open',
 'Hurricane',
 'A-O-K',
 'Butter',
 'On My Side',
 'Too Easy',
 'Leave Before You Love Me',
 'No Where',
 'Peaches',
 'Family Ties',
 "Memory I Don't Mess With",
 'Late At Night',
 'Things A Man Oughta Know',
 'Cold Heart (PNAU Remix)',
 'Meet Me At Our Spot',
 'Life Support',
 'I Was On A Boat That Day',
 'Love Nwantiti (Ah Ah Ah)',
 'Wild Side',
 'Whole Lotta Money',
 'Hold Me Down',
 'Champagne Poetry',
 'Buy Dirt',
 'Have Mercy',

## Build a dataframe

In [9]:
hot100 = pd.DataFrame(
    {"song": songs,
     "artist": artists,
     "last_week_rank": last_week_ranks,
     "peak_rank": peak_ranks,
     "weeks_on_chart": weeks_on_chart})

In [10]:
hot100.head()

Unnamed: 0,song,artist,last_week_rank,peak_rank,weeks_on_chart
0,My Universe,Coldplay x BTS,-,1,1
1,Stay,The Kid LAROI & Justin Bieber,1,1,12
2,Industry Baby,Lil Nas X & Jack Harlow,2,2,10
3,Way 2 Sexy,Drake Featuring Future & Young Thug,3,1,4
4,Fancy Like,Walker Hayes,5,5,15


# World Singles Top 100 data

In [11]:
url_ws100 = "https://top40-charts.com/chart.php?cid=35"

In [12]:
response = requests.get(url_ws100)
response.status_code

200

In [53]:
soup_ws100 = BeautifulSoup(response.content, "html.parser")

ws100_page = soup_ws100.find_all('tr', attrs={'class': 'latc_song'})

In [54]:
ws100_songs, ws100_artists, ws100_last_week_ranks, ws100_peak_ranks, ws100_weeks_on_chart = [], [], [], [], []

for song in ws100_page:
    ws100_songs.append(song.find_all('a')[2].get_text())
    ws100_artists.append(song.find_all('a')[3].get_text())
    ws100_last_week_ranks.append(song.find_all('td', attrs={'class': 'text-nowrap text-center'})[2].get_text())
    ws100_peak_ranks.append(song.find_all('td', attrs={'align': 'right'})[3].find('font').get_text())
    ws100_weeks_on_chart.append(song.find_all('td', attrs={'align': 'right'})[4].find('font').get_text())
    

In [56]:
ws100 = pd.DataFrame(
    {"song": ws100_songs,
     "artist": ws100_artists,
     "last_week_rank": ws100_last_week_ranks,
     "peak_rank": ws100_peak_ranks,
     "weeks_on_chart": ws100_weeks_on_chart})

In [57]:
ws100

Unnamed: 0,song,artist,last_week_rank,peak_rank,weeks_on_chart
0,Stay,Kid Laroi & Justin Bieber,1,1,12
1,Bad Habits,Ed Sheeran,6,1,14
2,Industry Baby,Lil Nas X & Jack Harlow,5,2,10
3,Shivers,Ed Sheeran,2,2,3
4,Thats What I Want,Lil Nas X,3,3,2
...,...,...,...,...,...
95,Remote Control,Kanye West,62,62,2
96,Tu Mi Hai Capito,Madame & Sfera Ebbasta,RE,2,14
97,Todo De Ti,Rauw Alejandro,100,57,7
98,Ik Ga Zwemmen,Mart Hoogkamer,RE,6,24
