## Lab | Web Scraping Single Page

In [1]:
!pip install bs4



In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

columns = ["Song", "Artist"]

In [3]:
url = "https://www.billboard.com/charts/hot-100/"
page = requests.get(url)
soup = BeautifulSoup(page.text,'html')

In [4]:
song_top1 = soup.find_all("h3", attrs={"id": "title-of-a-story", "class": "c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 u-font-size-23@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-245 u-max-width-230@tablet-only u-letter-spacing-0028@tablet"})
artist_top1 = soup.find_all("span", attrs={"class": "c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 u-max-width-230@tablet-only u-font-size-20@tablet"})

song_top2_100 = soup.find_all("h3", attrs={"id": "title-of-a-story", "class": "c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 lrv-u-font-size-18@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-330 u-max-width-230@tablet-only"})
artist_top2_100 = soup.find_all("span", attrs={"class": "c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 u-max-width-230@tablet-only"})

In [5]:
assert (len(song_top1) == len(artist_top1) 
        and len(song_top2_100) == len(artist_top2_100) 
        and len(song_top1) == 1 
        and len(song_top2_100) == 99)

In [6]:
song_artist_list = [(song_top1[0].get_text(strip=True), artist_top1[0].get_text(strip=True))]
song_artist_list.extend(
    (song.get_text(strip=True), artist.get_text(strip=True))
    for song, artist in zip(song_top2_100, artist_top2_100)
)
song_artist_list

[('Paint The Town Red', 'Doja Cat'),
 ('I Remember Everything', 'Zach Bryan Featuring Kacey Musgraves'),
 ('Fast Car', 'Luke Combs'),
 ('Cruel Summer', 'Taylor Swift'),
 ('Last Night', 'Morgan Wallen'),
 ('Dance The Night', 'Dua Lipa'),
 ('Snooze', 'SZA'),
 ('Fukumean', 'Gunna'),
 ('Vampire', 'Olivia Rodrigo'),
 ('Calm Down', 'Rema & Selena Gomez'),
 ('Rich Men North Of Richmond', 'Oliver Anthony Music'),
 ('Barbie World', 'Nicki Minaj & Ice Spice With Aqua'),
 ('Flowers', 'Miley Cyrus'),
 ('All My Life', 'Lil Durk Featuring J. Cole'),
 ('Religiously', 'Bailey Zimmerman'),
 ('Used To Be Young', 'Miley Cyrus'),
 ('Hey Driver', 'Zach Bryan Featuring The War And Treaty'),
 ('Need A Favor', 'Jelly Roll'),
 ("Thinkin' Bout Me", 'Morgan Wallen'),
 ('Anti-Hero', 'Taylor Swift'),
 ('Kill Bill', 'SZA'),
 ('What Was I Made For?', 'Billie Eilish'),
 ('Last Time I Saw You', 'Nicki Minaj'),
 ('Karma', 'Taylor Swift Featuring Ice Spice'),
 ("Creepin'", 'Metro Boomin, The Weeknd & 21 Savage'),
 ('Bad

In [7]:
song_artist_df = pd.DataFrame(data=song_artist_list, columns=columns)
song_artist_df

Unnamed: 0,Song,Artist
0,Paint The Town Red,Doja Cat
1,I Remember Everything,Zach Bryan Featuring Kacey Musgraves
2,Fast Car,Luke Combs
3,Cruel Summer,Taylor Swift
4,Last Night,Morgan Wallen
...,...,...
95,Stand By Me,Lil Durk Featuring Morgan Wallen
96,Call Your Friends,Rod Wave
97,Your Heart Or Mine,Jon Pardi
98,Primera Cita,Carin Leon


## Lab | Web Scraping Multiple Pages

In [8]:
url_nm = "https://www.timeout.com/music/best-sexy-songs"
page_nm = requests.get(url_nm)
soup_nm = BeautifulSoup(page_nm.text,'html')

In [9]:
song_top_nm = soup_nm.select("#main-container > main > div:nth-child(1) > section._main_content_1p4mq_7 > div.zone._zoneFirst_abr0c_5._zone_abr0c_1 > div._zoneItems_4w5ul_1.zoneItems > article > div.articleContent._articleContent_vapn8_229 > div._title_vapn8_9 > div > h3")
song_top_nm

[<h3 class="_h3_cuogz_1" data-testid="tile-title_testID"><span>1.</span> ‘Sexual Healing’ by Marvin Gaye</h3>,
 <h3 class="_h3_cuogz_1" data-testid="tile-title_testID"><span>2.</span> ‘I’m Gonna Love You Just A Little More Baby"’by Barry White</h3>,
 <h3 class="_h3_cuogz_1" data-testid="tile-title_testID"><span>3.</span> ‘Wicked Game’ by Chris Isaak</h3>,
 <h3 class="_h3_cuogz_1" data-testid="tile-title_testID"><span>4.</span> ‘Untitled (How Does It Feel?)’ by D’Angelo</h3>,
 <h3 class="_h3_cuogz_1" data-testid="tile-title_testID"><span>5.</span> ‘Fade Into You’ by Mazzy Star</h3>,
 <h3 class="_h3_cuogz_1" data-testid="tile-title_testID"><span>6.</span> ‘Need You Tonight’ by INXS</h3>,
 <h3 class="_h3_cuogz_1" data-testid="tile-title_testID"><span>7.</span> ‘Sexy MF’ by Prince</h3>,
 <h3 class="_h3_cuogz_1" data-testid="tile-title_testID"><span>8.</span> ‘Video Phone’ by Beyoncé</h3>,
 <h3 class="_h3_cuogz_1" data-testid="tile-title_testID"><span>9.</span> ‘The Sweetest Taboo’ by Sade<

In [10]:
assert len(song_top_nm) == 60

In [11]:
raw_titles = [i.get_text(strip=True) for i in song_top_nm]
raw_titles

['1.‘Sexual Healing’ by Marvin Gaye',
 '2.‘I’m Gonna Love You Just A Little More Baby"’by Barry White',
 '3.‘Wicked Game’ by Chris Isaak',
 '4.‘Untitled (How Does It Feel?)’ by D’Angelo',
 '5.‘Fade Into You’ by Mazzy Star',
 '6.‘Need You Tonight’ by INXS',
 '7.‘Sexy MF’ by Prince',
 '8.‘Video Phone’ by Beyoncé',
 '9.‘The Sweetest Taboo’ by Sade',
 '10.‘I’m on Fire’ by Bruce Springsteen',
 '11.‘Criminal’ by Fiona Apple',
 '12.‘Do You Mind’ by the xx',
 '13.‘Lady Marmalade’ by Labelle',
 '14.‘Love to Love You Baby’ by Donna Summer',
 '15.‘Lollipop’ by Lil Wayne',
 '16.‘Climax’ by Usher',
 '17.‘I Just Wanna Make Love to You’ by Etta James',
 '18.‘Sex with Me’ by Rihanna',
 '19.‘I Want Your Sex’ by George Michael',
 '20.‘I Feel You’ by Depeche Mode',
 '21.‘WAP’ by Cardi B and Megan Thee Stallion',
 "22.‘That's the Way Love Goes’ by Janet Jackson",
 '23.‘Rock the Boat’ by Aaliyah',
 '24.‘I Can Tell’ by 504 Boyz',
 '25.‘Friendly Skies’ by Missy Elliott featuring Ginuwine',
 '26.‘Lay It Down’

In [12]:
song_artist_raw = [i.split(" by ") if " by " in i else i.split("by ") for i in raw_titles]
song_artist_raw

[['1.‘Sexual Healing’', 'Marvin Gaye'],
 ['2.‘I’m Gonna Love You Just A Little More Baby"’', 'Barry White'],
 ['3.‘Wicked Game’', 'Chris Isaak'],
 ['4.‘Untitled (How Does It Feel?)’', 'D’Angelo'],
 ['5.‘Fade Into You’', 'Mazzy Star'],
 ['6.‘Need You Tonight’', 'INXS'],
 ['7.‘Sexy MF’', 'Prince'],
 ['8.‘Video Phone’', 'Beyoncé'],
 ['9.‘The Sweetest Taboo’', 'Sade'],
 ['10.‘I’m on Fire’', 'Bruce Springsteen'],
 ['11.‘Criminal’', 'Fiona Apple'],
 ['12.‘Do You Mind’', 'the xx'],
 ['13.‘Lady Marmalade’', 'Labelle'],
 ['14.‘Love to Love You Baby’', 'Donna Summer'],
 ['15.‘Lollipop’', 'Lil Wayne'],
 ['16.‘Climax’', 'Usher'],
 ['17.‘I Just Wanna Make Love to You’', 'Etta James'],
 ['18.‘Sex with Me’', 'Rihanna'],
 ['19.‘I Want Your Sex’', 'George Michael'],
 ['20.‘I Feel You’', 'Depeche Mode'],
 ['21.‘WAP’', 'Cardi B and Megan Thee Stallion'],
 ["22.‘That's the Way Love Goes’", 'Janet Jackson'],
 ['23.‘Rock the Boat’', 'Aaliyah'],
 ['24.‘I Can Tell’', '504 Boyz'],
 ['25.‘Friendly Skies’', 'Mis

In [13]:
song_artist_nm = [("".join(i[0].split("‘")[1:])[:-1], i[1]) for i in song_artist_raw]
song_artist_nm

[('Sexual Healing', 'Marvin Gaye'),
 ('I’m Gonna Love You Just A Little More Baby"', 'Barry White'),
 ('Wicked Game', 'Chris Isaak'),
 ('Untitled (How Does It Feel?)', 'D’Angelo'),
 ('Fade Into You', 'Mazzy Star'),
 ('Need You Tonight', 'INXS'),
 ('Sexy MF', 'Prince'),
 ('Video Phone', 'Beyoncé'),
 ('The Sweetest Taboo', 'Sade'),
 ('I’m on Fire', 'Bruce Springsteen'),
 ('Criminal', 'Fiona Apple'),
 ('Do You Mind', 'the xx'),
 ('Lady Marmalade', 'Labelle'),
 ('Love to Love You Baby', 'Donna Summer'),
 ('Lollipop', 'Lil Wayne'),
 ('Climax', 'Usher'),
 ('I Just Wanna Make Love to You', 'Etta James'),
 ('Sex with Me', 'Rihanna'),
 ('I Want Your Sex', 'George Michael'),
 ('I Feel You', 'Depeche Mode'),
 ('WAP', 'Cardi B and Megan Thee Stallion'),
 ("That's the Way Love Goes", 'Janet Jackson'),
 ('Rock the Boat', 'Aaliyah'),
 ('I Can Tell', '504 Boyz'),
 ('Friendly Skies', 'Missy Elliott featuring Ginuwine'),
 ('Lay It Down', 'Al Green'),
 ('Montero (Call Me By Your Name)', "Lil' Nas X"),
 (

In [14]:
pd.concat([song_artist_df, pd.DataFrame(song_artist_nm, columns=columns)], ignore_index=True)

Unnamed: 0,Song,Artist
0,Paint The Town Red,Doja Cat
1,I Remember Everything,Zach Bryan Featuring Kacey Musgraves
2,Fast Car,Luke Combs
3,Cruel Summer,Taylor Swift
4,Last Night,Morgan Wallen
...,...,...
155,No Diggity,Blackstreet featuring Dr. Dre
156,La Femme d’Argent,Air
157,I’ll Make Love To You,Boyz II Men
158,Freak Me,Silk
