# Lab | Web Scraping Single Page (GNOD part 1)

In [1]:
from bs4 import BeautifulSoup

In [2]:
import requests
import pandas as pd

In [3]:
# 2. find url and store it in a variable
url = "https://www.popvortex.com/music/charts/top-100-songs.php"

In [4]:
# 3. download html with a get request
response = requests.get(url)

In [5]:
# Check if the request was successful (status code 200)
response.status_code # 200 status code means OK!

200

In [6]:
# 4.1. parse html (create the 'soup')
soup = BeautifulSoup(response.content, "html.parser")

In [7]:
# 4.2. check that the html code looks like it should
# soup

In [8]:
song_elements = soup.find_all('div',class_ ='chart-content')

In [9]:
for i, element in enumerate(song_elements):
        title = element.find('cite', class_='title').text.strip()
        artist = element.find('em', class_='artist').text.strip()
        genre = element.find('ul').find('li').text.strip()
      
        print(f"{i + 1}. {title} - {artist} - {genre}")

1. TEXAS HOLD 'EM - Beyoncé - Genre: Country
2. Lose Control - Teddy Swims - Genre: Pop
3. Beautiful Messes - Hillary Scott & The Scott Family - Genre: Country
4. Beautiful Things - Benson Boone - Genre: Pop
5. TEXAS HOLD 'EM - Beyoncé - Genre: Country
6. Flowers - Miley Cyrus - Genre: Pop
7. Lovin On Me - Jack Harlow - Genre: Hip-Hop / Rap
8. Selfish - Justin Timberlake - Genre: Pop
9. Turn the Lights Back On - Billy Joel - Genre: Pop
10. Don't Let the Old Man In - Toby Keith - Genre: Country
11. I Remember Everything (feat. Kacey Musgraves) - Zach Bryan - Genre: Country
12. Made For Me - Muni Long - Genre: R&B / Soul
13. Fast Car - Luke Combs - Genre: Country
14. 16 CARRIAGES - Beyoncé - Genre: Country
15. Where the Wild Things Are - Luke Combs - Genre: Country
16. Houdini - Dua Lipa - Genre: Pop
17. Live Like You Were Dying - Tim McGraw - Genre: Country
18. Save Me - Jelly Roll - Genre: Rap
19. Lil Boo Thang - Paul Russell - Genre: Pop
20. Training Season - Dua Lipa - New Release
21

In [10]:
titles = []
artists = []
genres = []
for i, element in enumerate(song_elements):
        title = element.find('cite', class_='title').text.strip()
        titles.append(title)
        artist = element.find('em', class_='artist').text.strip()
        artists.append(artist)
        genre = element.find('ul').find('li').text.strip()
        genres.append(genre)
        print(f"{title} - {artist} - {genre}")
        #data.append('title', 'artist', 'genre'])


TEXAS HOLD 'EM - Beyoncé - Genre: Country
Lose Control - Teddy Swims - Genre: Pop
Beautiful Messes - Hillary Scott & The Scott Family - Genre: Country
Beautiful Things - Benson Boone - Genre: Pop
TEXAS HOLD 'EM - Beyoncé - Genre: Country
Flowers - Miley Cyrus - Genre: Pop
Lovin On Me - Jack Harlow - Genre: Hip-Hop / Rap
Selfish - Justin Timberlake - Genre: Pop
Turn the Lights Back On - Billy Joel - Genre: Pop
Don't Let the Old Man In - Toby Keith - Genre: Country
I Remember Everything (feat. Kacey Musgraves) - Zach Bryan - Genre: Country
Made For Me - Muni Long - Genre: R&B / Soul
Fast Car - Luke Combs - Genre: Country
16 CARRIAGES - Beyoncé - Genre: Country
Where the Wild Things Are - Luke Combs - Genre: Country
Houdini - Dua Lipa - Genre: Pop
Live Like You Were Dying - Tim McGraw - Genre: Country
Save Me - Jelly Roll - Genre: Rap
Lil Boo Thang - Paul Russell - Genre: Pop
Training Season - Dua Lipa - New Release
Yeah! (feat. Lil Jon & Ludacris) - USHER - Genre: R&B / Soul
Fast Car - T

In [11]:
presis_df = pd.DataFrame({"title":titles,
                          "artist":artists,
                          "genre": genres})
presis_df

Unnamed: 0,title,artist,genre
0,TEXAS HOLD 'EM,Beyoncé,Genre: Country
1,Lose Control,Teddy Swims,Genre: Pop
2,Beautiful Messes,Hillary Scott & The Scott Family,Genre: Country
3,Beautiful Things,Benson Boone,Genre: Pop
4,TEXAS HOLD 'EM,Beyoncé,Genre: Country
...,...,...,...
95,Karma,Taylor Swift,Genre: Pop
96,Unstoppable,Sia,Genre: Pop
97,Lover,Taylor Swift,Genre: Pop
98,EASY,LE SSERAFIM,New Release


In [12]:
import re
from datetime import datetime

if not song_elements:
    print("No song elements found. Please check if the website structure has changed.")
    exit()

data = []
for i, element in enumerate(song_elements):
    title = element.find('cite', class_='title').text.strip()
    artist = element.find('em', class_='artist').text.strip()

    # Extracting genre
    genre = "Genre not found"
    ul_tag = element.find('ul')
    if ul_tag:
        for li_tag in ul_tag.find_all('li'):
            if 'Genre' in li_tag.text:
                genre = li_tag.text.strip().replace('Genre:', '')
                break
                
    # Extracting release date using regular expression
    release_date_text = ""
    release_date_match = re.search(r'Release Date: (\w+ \d{1,2}, \d{4})', element.get_text())
    if release_date_match:
        release_date_text = release_date_match.group(1)
        
        # Convert release date to date format
        release_date = datetime.strptime(release_date_text, "%B %d, %Y").strftime("%d/%m/%Y")

    data.append([title, artist, genre, release_date])

df = pd.DataFrame(data, columns=['title', 'artist', 'genre', 'release_date'])
display(df)

Unnamed: 0,title,artist,genre,release_date
0,TEXAS HOLD 'EM,Beyoncé,Country,11/02/2024
1,Lose Control,Teddy Swims,Pop,23/06/2023
2,Beautiful Messes,Hillary Scott & The Scott Family,Country,08/07/2016
3,Beautiful Things,Benson Boone,Pop,19/01/2024
4,TEXAS HOLD 'EM,Beyoncé,Country,09/02/2024
...,...,...,...,...
95,Karma,Taylor Swift,Pop,21/10/2022
96,Unstoppable,Sia,Pop,21/01/2016
97,Lover,Taylor Swift,Pop,16/08/2019
98,EASY,LE SSERAFIM,K-Pop,19/02/2024


# Lab | Web Scraping Single Page (GNOD part 2)¶

In [13]:
import random

In [14]:
# Recommend function for a random song if the input song is found in the DataFrame
def recommend_song(song_title):
    # Check if the song is in the DataFrame (case insensitive)
    if df['title'].str.lower().str.contains(song_title.lower()).any():
        # Get a random index that is not the index of the input song
        random_index = df[df['title'].str.lower() != song_title.lower()].sample().index[0]
        # Get the random song and artist
        random_song = df.loc[random_index, 'title']
        random_artist = df.loc[random_index, 'artist']
        return f"Thank you for the input and now we recommend \"{random_song}\" by {random_artist}"
    else:
        return "Thank you for the input but we currently have no recommendation for this song."

# Loop to recommend songs for 5 times
for i in range(5):
    input_song = input("Enter a song title: ")
    recommendation = recommend_song(input_song)
    print(recommendation)

Enter a song title: flower
Thank you for the input and now we recommend "Life With You" by Kelsey Hart
Enter a song title: flowers
Thank you for the input and now we recommend "Tennessee Whiskey" by Chris Stapleton
Enter a song title: lose controls
Thank you for the input but we currently have no recommendation for this song.
Enter a song title: lose
Thank you for the input and now we recommend "Standing Next to You (Band Version)" by Jung Kook
Enter a song title: karma
Thank you for the input and now we recommend "In Case You Didn't Know" by Brett Young


# Lab | Web Scraping Single Page (GNOD part 3)

In [15]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [16]:
secrets_file = open("spotify.txt","r")
string = secrets_file.read()

In [17]:
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        #print(line.split(':'))
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()

In [49]:
# Enter to Spotify

#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],
                                                           client_secret=secrets_dict['clientsecret']))

In [33]:
# Spotify Playlist = "1TM3rECdFTc0R0tgiIf0oW"

In [42]:
# we will use our 'first paid music' playlist as an example:
playlist = sp.user_playlist_tracks("spotify", "1TM3rECdFTc0R0tgiIf0oW")

In [20]:
# this one is biiiig!
playlist["total"] 

4690

In [21]:
# playlist['items'] contains the tracks on the playlist
# playlist['items']

In [22]:
# we could use the url to the next page which is provided...
playlist['next']

'https://api.spotify.com/v1/playlists/1TM3rECdFTc0R0tgiIf0oW/tracks?offset=100&limit=100&additional_types=track'

In [23]:
from random import randint
from time import sleep

def get_playlist_tracks(playlist_id):
    results = sp.user_playlist_tracks("spotify",playlist_id)
    tracks = results['items']
    while results['next']!=None:
        results = sp.next(results)
        tracks = tracks + results['items']
        sleep(randint(1,3000)/1000) # respectful nap
    return tracks

In [24]:
all_tracks = get_playlist_tracks("1TM3rECdFTc0R0tgiIf0oW")
len(all_tracks)

4690

In [25]:
import pandas as pd
from pandas import json_normalize

In [26]:
tracks2 = json_normalize(all_tracks)

In [32]:
pd.set_option('display.max_columns',0)
print(tracks2.shape)
tracks2.head(20)

(4690, 40)


Unnamed: 0,added_at,is_local,primary_color,added_by.external_urls.spotify,added_by.href,added_by.id,added_by.type,added_by.uri,track.album.album_type,track.album.artists,track.album.available_markets,track.album.external_urls.spotify,track.album.href,track.album.id,track.album.images,track.album.name,track.album.release_date,track.album.release_date_precision,track.album.total_tracks,track.album.type,track.album.uri,track.artists,track.available_markets,track.disc_number,track.duration_ms,track.episode,track.explicit,track.external_ids.isrc,track.external_urls.spotify,track.href,track.id,track.is_local,track.name,track.popularity,track.preview_url,track.track,track.track_number,track.type,track.uri,video_thumbnail.url
0,2022-03-12T12:22:16Z,False,,https://open.spotify.com/user/ben.woody.cheval,https://api.spotify.com/v1/users/ben.woody.cheval,ben.woody.cheval,user,spotify:user:ben.woody.cheval,album,[{'external_urls': {'spotify': 'https://open.s...,[],https://open.spotify.com/album/2uRTsStAmo7Z2Uw...,https://api.spotify.com/v1/albums/2uRTsStAmo7Z...,2uRTsStAmo7Z2UwCIvuwMv,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",b'lieve i'm goin down...,2015-09-25,day,12.0,album,spotify:album:2uRTsStAmo7Z2UwCIvuwMv,[{'external_urls': {'spotify': 'https://open.s...,[],1,298760,False,False,USMTD1506585,https://open.spotify.com/track/3l9eg9RtisizG12...,https://api.spotify.com/v1/tracks/3l9eg9Rtisiz...,3l9eg9RtisizG12a1D6nZl,False,Pretty Pimpin,0,,True,1,track,spotify:track:3l9eg9RtisizG12a1D6nZl,
1,2022-03-12T12:22:16Z,False,,https://open.spotify.com/user/ben.woody.cheval,https://api.spotify.com/v1/users/ben.woody.cheval,ben.woody.cheval,user,spotify:user:ben.woody.cheval,album,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",https://open.spotify.com/album/4OI9hKTm1QMRQun...,https://api.spotify.com/v1/albums/4OI9hKTm1QMR...,4OI9hKTm1QMRQunqHCfSSL,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Trouble Will Find Me,2013-05-20,day,13.0,album,spotify:album:4OI9hKTm1QMRQunqHCfSSL,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,245240,False,True,GBAFL1300064,https://open.spotify.com/track/50M7nY1oQuNHecs...,https://api.spotify.com/v1/tracks/50M7nY1oQuNH...,50M7nY1oQuNHecs0ahWAtI,False,I Need My Girl,68,https://p.scdn.co/mp3-preview/d566beee419ec326...,True,10,track,spotify:track:50M7nY1oQuNHecs0ahWAtI,
2,2022-03-12T12:22:16Z,False,,https://open.spotify.com/user/ben.woody.cheval,https://api.spotify.com/v1/users/ben.woody.cheval,ben.woody.cheval,user,spotify:user:ben.woody.cheval,album,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",https://open.spotify.com/album/1qhNLXBmt1EW6Kw...,https://api.spotify.com/v1/albums/1qhNLXBmt1EW...,1qhNLXBmt1EW6Kwt2A3u8M,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",On Fire,1989-09-01,day,13.0,album,spotify:album:1qhNLXBmt1EW6Kwt2A3u8M,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,199466,False,False,USRY29600081,https://open.spotify.com/track/2TTAPkrGQQOZkaU...,https://api.spotify.com/v1/tracks/2TTAPkrGQQOZ...,2TTAPkrGQQOZkaUTUlt21Q,False,Strange,48,https://p.scdn.co/mp3-preview/8942abcf1bc1fe32...,True,4,track,spotify:track:2TTAPkrGQQOZkaUTUlt21Q,
3,2022-03-12T12:22:16Z,False,,https://open.spotify.com/user/ben.woody.cheval,https://api.spotify.com/v1/users/ben.woody.cheval,ben.woody.cheval,user,spotify:user:ben.woody.cheval,album,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",https://open.spotify.com/album/1tl1oov2DNSZou6...,https://api.spotify.com/v1/albums/1tl1oov2DNSZ...,1tl1oov2DNSZou6LMtElC6,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",1184,2002-06-12,day,8.0,album,spotify:album:1tl1oov2DNSZou6LMtElC6,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,295131,False,False,NOFVH0151010,https://open.spotify.com/track/7BvcpEdO7PUDGGS...,https://api.spotify.com/v1/tracks/7BvcpEdO7PUD...,7BvcpEdO7PUDGGSER1S9LA,False,Todeswalzer,36,https://p.scdn.co/mp3-preview/3cf49f28fa44b6bd...,True,1,track,spotify:track:7BvcpEdO7PUDGGSER1S9LA,
4,2022-03-12T12:22:16Z,False,,https://open.spotify.com/user/ben.woody.cheval,https://api.spotify.com/v1/users/ben.woody.cheval,ben.woody.cheval,user,spotify:user:ben.woody.cheval,album,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",https://open.spotify.com/album/4yP0hdKOZPNshxU...,https://api.spotify.com/v1/albums/4yP0hdKOZPNs...,4yP0hdKOZPNshxUOjY0cZj,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",After Hours,2020-03-20,day,14.0,album,spotify:album:4yP0hdKOZPNshxUOjY0cZj,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,200040,False,False,USUG11904206,https://open.spotify.com/track/0VjIjW4GlUZAMYd...,https://api.spotify.com/v1/tracks/0VjIjW4GlUZA...,0VjIjW4GlUZAMYd2vXMi3b,False,Blinding Lights,93,,True,9,track,spotify:track:0VjIjW4GlUZAMYd2vXMi3b,
5,2022-03-12T12:22:16Z,False,,https://open.spotify.com/user/ben.woody.cheval,https://api.spotify.com/v1/users/ben.woody.cheval,ben.woody.cheval,user,spotify:user:ben.woody.cheval,album,[{'external_urls': {'spotify': 'https://open.s...,[],https://open.spotify.com/album/07e1O0K6zbwLR2p...,https://api.spotify.com/v1/albums/07e1O0K6zbwL...,07e1O0K6zbwLR2plst56ap,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",From The Very Depths,2015-01-26,day,14.0,album,spotify:album:07e1O0K6zbwLR2plst56ap,[{'external_urls': {'spotify': 'https://open.s...,[],1,242253,False,False,GBUM71405210,https://open.spotify.com/track/7AEkZ1EgzDrled3...,https://api.spotify.com/v1/tracks/7AEkZ1EgzDrl...,7AEkZ1EgzDrled3sxpPHZ4,False,Long Haired Punks,0,,True,6,track,spotify:track:7AEkZ1EgzDrled3sxpPHZ4,
6,2022-03-12T12:22:16Z,False,,https://open.spotify.com/user/ben.woody.cheval,https://api.spotify.com/v1/users/ben.woody.cheval,ben.woody.cheval,user,spotify:user:ben.woody.cheval,album,[{'external_urls': {'spotify': 'https://open.s...,[],https://open.spotify.com/album/0G2tg0gxWc0YzxN...,https://api.spotify.com/v1/albums/0G2tg0gxWc0Y...,0G2tg0gxWc0YzxNX7pc6JW,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Devoid of Light,2016-04-04,day,5.0,album,spotify:album:0G2tg0gxWc0YzxNX7pc6JW,[{'external_urls': {'spotify': 'https://open.s...,[],1,356847,False,False,USA2P1634626,https://open.spotify.com/track/4pkADG8kSJzbTjQ...,https://api.spotify.com/v1/tracks/4pkADG8kSJzb...,4pkADG8kSJzbTjQxvpjbdY,False,Devoid of Light,0,,True,2,track,spotify:track:4pkADG8kSJzbTjQxvpjbdY,
7,2022-03-12T12:22:16Z,False,,https://open.spotify.com/user/ben.woody.cheval,https://api.spotify.com/v1/users/ben.woody.cheval,ben.woody.cheval,user,spotify:user:ben.woody.cheval,album,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",https://open.spotify.com/album/2NrYPcMmQBlbBxo...,https://api.spotify.com/v1/albums/2NrYPcMmQBlb...,2NrYPcMmQBlbBxopc2XlzS,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",GLOW ON,2021-08-27,day,15.0,album,spotify:album:2NrYPcMmQBlbBxopc2XlzS,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,155199,False,False,NLA322100049,https://open.spotify.com/track/5iXnD2VizcAbErp...,https://api.spotify.com/v1/tracks/5iXnD2VizcAb...,5iXnD2VizcAbErpkcuNQ6I,False,MYSTERY,61,https://p.scdn.co/mp3-preview/5597c95b04ab82d6...,True,1,track,spotify:track:5iXnD2VizcAbErpkcuNQ6I,
8,2022-03-12T12:22:16Z,False,,https://open.spotify.com/user/ben.woody.cheval,https://api.spotify.com/v1/users/ben.woody.cheval,ben.woody.cheval,user,spotify:user:ben.woody.cheval,album,[{'external_urls': {'spotify': 'https://open.s...,[],https://open.spotify.com/album/5bbb7E51zaDCuD8...,https://api.spotify.com/v1/albums/5bbb7E51zaDC...,5bbb7E51zaDCuD85uLyFkK,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Rain Dogs,1985-09-30,day,19.0,album,spotify:album:5bbb7E51zaDCuD85uLyFkK,[{'external_urls': {'spotify': 'https://open.s...,[],1,226666,False,False,USIR28500395,https://open.spotify.com/track/5TeoCsHbzWo9UgM...,https://api.spotify.com/v1/tracks/5TeoCsHbzWo9...,5TeoCsHbzWo9UgMW3rv7JL,False,Clap Hands,0,,True,2,track,spotify:track:5TeoCsHbzWo9UgMW3rv7JL,
9,2022-03-12T12:22:16Z,False,,https://open.spotify.com/user/ben.woody.cheval,https://api.spotify.com/v1/users/ben.woody.cheval,ben.woody.cheval,user,spotify:user:ben.woody.cheval,album,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",https://open.spotify.com/album/6EsOjYE3bNeDfdw...,https://api.spotify.com/v1/albums/6EsOjYE3bNeD...,6EsOjYE3bNeDfdwOehAUaN,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Savage Gold,2014-06-10,day,10.0,album,spotify:album:6EsOjYE3bNeDfdwOehAUaN,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,328942,False,False,US2641422606,https://open.spotify.com/track/71MHVUJYwhVuKZm...,https://api.spotify.com/v1/tracks/71MHVUJYwhVu...,71MHVUJYwhVuKZmt7EgJFS,False,Edge of Darkness,3,https://p.scdn.co/mp3-preview/cde403fdb382bcd0...,True,6,track,spotify:track:71MHVUJYwhVuKZmt7EgJFS,


In [28]:
artists_df2 = pd.DataFrame(columns=['href', 'id', 'name', 'type', 'uri', 'external_urls.spotify','song_id', 'song_name', 'popularity' ])
for i in tracks2.index:
    artists_for_song = json_normalize(tracks2.iloc[i]['track.artists'])
    artists_for_song['song_id']    = tracks2.iloc[i]['track.id']         # we want to keep song_id, it is the sae for all artists
    artists_for_song['song_name']  = tracks2.iloc[i]['track.name']       # we want to keep song_name, it is the sae for all artists
    artists_for_song['popularity'] = tracks2.iloc[i]['track.popularity'] # same for popularity   
    artists_df2 = pd.concat([artists_df2, artists_for_song], axis=0)

In [29]:
artists_df2.head()

Unnamed: 0,href,id,name,type,uri,external_urls.spotify,song_id,song_name,popularity
0,https://api.spotify.com/v1/artists/5gspAQIAH8n...,5gspAQIAH8nJUrMYgXjCJ2,Kurt Vile,artist,spotify:artist:5gspAQIAH8nJUrMYgXjCJ2,https://open.spotify.com/artist/5gspAQIAH8nJUr...,3l9eg9RtisizG12a1D6nZl,Pretty Pimpin,0
0,https://api.spotify.com/v1/artists/2cCUtGK9sDU...,2cCUtGK9sDU2EoElnk0GNB,The National,artist,spotify:artist:2cCUtGK9sDU2EoElnk0GNB,https://open.spotify.com/artist/2cCUtGK9sDU2Eo...,50M7nY1oQuNHecs0ahWAtI,I Need My Girl,68
0,https://api.spotify.com/v1/artists/6guTJsgPymD...,6guTJsgPymDUVfqDJyz5UG,Galaxie 500,artist,spotify:artist:6guTJsgPymDUVfqDJyz5UG,https://open.spotify.com/artist/6guTJsgPymDUVf...,2TTAPkrGQQOZkaUTUlt21Q,Strange,48
0,https://api.spotify.com/v1/artists/2ytfu1MWsf7...,2ytfu1MWsf763hCBQmaQr6,Windir,artist,spotify:artist:2ytfu1MWsf763hCBQmaQr6,https://open.spotify.com/artist/2ytfu1MWsf763h...,7BvcpEdO7PUDGGSER1S9LA,Todeswalzer,36
0,https://api.spotify.com/v1/artists/1Xyo4u8uXC1...,1Xyo4u8uXC1ZmMpatF05PJ,The Weeknd,artist,spotify:artist:1Xyo4u8uXC1ZmMpatF05PJ,https://open.spotify.com/artist/1Xyo4u8uXC1ZmM...,0VjIjW4GlUZAMYd2vXMi3b,Blinding Lights,93


In [30]:
df_final2 = artists_df2[['song_name', 'name', 'song_id', 'popularity']]
df_final2

Unnamed: 0,song_name,name,song_id,popularity
0,Pretty Pimpin,Kurt Vile,3l9eg9RtisizG12a1D6nZl,0
0,I Need My Girl,The National,50M7nY1oQuNHecs0ahWAtI,68
0,Strange,Galaxie 500,2TTAPkrGQQOZkaUTUlt21Q,48
0,Todeswalzer,Windir,7BvcpEdO7PUDGGSER1S9LA,36
0,Blinding Lights,The Weeknd,0VjIjW4GlUZAMYd2vXMi3b,93
...,...,...,...,...
1,Apologize,OneRepublic,6ucR4KfvsBFWCMVFDvyKKl,75
0,Goddamn Lonely Love - Remastered,Drive-By Truckers,241LK9hqfXJyWpQ7oClQSN,48
0,Graveless yet Dead,Convocation,6hF9etLMoADWLsoui5ejGW,19
0,Gimme! Gimme! Gimme! (A Man After Midnight),ABBA,3vkQ5DAB1qQMYO4Mr9zJN6,84


# Lab | Adding Audio Feature

In [35]:
# get the audio features for a song
sp.audio_features('3l9eg9RtisizG12a1D6nZl')

[{'danceability': 0.566,
  'energy': 0.664,
  'key': 8,
  'loudness': -9.081,
  'mode': 1,
  'speechiness': 0.0281,
  'acousticness': 0.0928,
  'instrumentalness': 0.305,
  'liveness': 0.0903,
  'valence': 0.764,
  'tempo': 91.117,
  'type': 'audio_features',
  'id': '3l9eg9RtisizG12a1D6nZl',
  'uri': 'spotify:track:3l9eg9RtisizG12a1D6nZl',
  'track_href': 'https://api.spotify.com/v1/tracks/3l9eg9RtisizG12a1D6nZl',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/3l9eg9RtisizG12a1D6nZl',
  'duration_ms': 298760,
  'time_signature': 4}]

In [44]:
df_final2.isna().sum()

song_name     0
name          0
song_id       1
popularity    0
dtype: int64

In [45]:
df_final2[df_final2['song_id'].isna()]

Unnamed: 0,song_name,name,song_id,popularity
0,Restless,40 Watt Sun,,0


In [46]:
df_final2 = df_final2.dropna()

In [47]:
df_final2.isna().sum()

song_name     0
name          0
song_id       0
popularity    0
dtype: int64

In [50]:
# Get the loop for the audio feature
chunks = [(i, i+100) for i in range(0, len(df_final2), 100)]
chunks
audio_features_list = []
for chunk in chunks:
    id_list100 = df_final2['song_id'][chunk[0]:chunk[1]]
    audio_features_list = audio_features_list + sp.audio_features(id_list100)
    sleep(randint(1,3000)/1000)
len(audio_features_list)

4953

In [51]:
audio_features_df = json_normalize(audio_features_list)

In [52]:
audio_features_df.drop_duplicates(inplace=True) # duplicates because some songs have more artists

In [53]:
df_w_audio_ft = pd.merge(left=df_final2,
                        right=audio_features_df,
                        how='inner',
                        left_on='song_id',
                        right_on='id')
df_w_audio_ft

Unnamed: 0,song_name,name,song_id,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,Pretty Pimpin,Kurt Vile,3l9eg9RtisizG12a1D6nZl,0,0.566,0.664,8,-9.081,1,0.0281,0.092800,0.305000,0.0903,0.764,91.117,audio_features,3l9eg9RtisizG12a1D6nZl,spotify:track:3l9eg9RtisizG12a1D6nZl,https://api.spotify.com/v1/tracks/3l9eg9Rtisiz...,https://api.spotify.com/v1/audio-analysis/3l9e...,298760,4
1,I Need My Girl,The National,50M7nY1oQuNHecs0ahWAtI,68,0.520,0.436,0,-11.508,0,0.0355,0.865000,0.065100,0.1230,0.114,126.053,audio_features,50M7nY1oQuNHecs0ahWAtI,spotify:track:50M7nY1oQuNHecs0ahWAtI,https://api.spotify.com/v1/tracks/50M7nY1oQuNH...,https://api.spotify.com/v1/audio-analysis/50M7...,245240,4
2,Strange,Galaxie 500,2TTAPkrGQQOZkaUTUlt21Q,48,0.312,0.844,7,-7.085,1,0.0511,0.079800,0.023100,0.0793,0.626,97.039,audio_features,2TTAPkrGQQOZkaUTUlt21Q,spotify:track:2TTAPkrGQQOZkaUTUlt21Q,https://api.spotify.com/v1/tracks/2TTAPkrGQQOZ...,https://api.spotify.com/v1/audio-analysis/2TTA...,199467,4
3,Todeswalzer,Windir,7BvcpEdO7PUDGGSER1S9LA,36,0.236,0.870,2,-4.747,0,0.0579,0.000011,0.930000,0.3050,0.186,167.845,audio_features,7BvcpEdO7PUDGGSER1S9LA,spotify:track:7BvcpEdO7PUDGGSER1S9LA,https://api.spotify.com/v1/tracks/7BvcpEdO7PUD...,https://api.spotify.com/v1/audio-analysis/7Bvc...,295131,4
4,Blinding Lights,The Weeknd,0VjIjW4GlUZAMYd2vXMi3b,93,0.514,0.730,1,-5.934,1,0.0598,0.001460,0.000095,0.0897,0.334,171.005,audio_features,0VjIjW4GlUZAMYd2vXMi3b,spotify:track:0VjIjW4GlUZAMYd2vXMi3b,https://api.spotify.com/v1/tracks/0VjIjW4GlUZA...,https://api.spotify.com/v1/audio-analysis/0VjI...,200040,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4948,Apologize,OneRepublic,6ucR4KfvsBFWCMVFDvyKKl,75,0.653,0.604,8,-6.017,1,0.0278,0.029200,0.000000,0.0970,0.101,118.016,audio_features,6ucR4KfvsBFWCMVFDvyKKl,spotify:track:6ucR4KfvsBFWCMVFDvyKKl,https://api.spotify.com/v1/tracks/6ucR4KfvsBFW...,https://api.spotify.com/v1/audio-analysis/6ucR...,184400,4
4949,Goddamn Lonely Love - Remastered,Drive-By Truckers,241LK9hqfXJyWpQ7oClQSN,48,0.427,0.541,7,-7.042,1,0.0294,0.107000,0.004810,0.2230,0.153,119.094,audio_features,241LK9hqfXJyWpQ7oClQSN,spotify:track:241LK9hqfXJyWpQ7oClQSN,https://api.spotify.com/v1/tracks/241LK9hqfXJy...,https://api.spotify.com/v1/audio-analysis/241L...,301720,4
4950,Graveless yet Dead,Convocation,6hF9etLMoADWLsoui5ejGW,19,0.262,0.689,2,-8.342,1,0.0705,0.036600,0.008330,0.1170,0.178,130.110,audio_features,6hF9etLMoADWLsoui5ejGW,spotify:track:6hF9etLMoADWLsoui5ejGW,https://api.spotify.com/v1/tracks/6hF9etLMoADW...,https://api.spotify.com/v1/audio-analysis/6hF9...,572149,4
4951,Gimme! Gimme! Gimme! (A Man After Midnight),ABBA,3vkQ5DAB1qQMYO4Mr9zJN6,84,0.749,0.491,10,-9.655,1,0.0403,0.020000,0.008990,0.1590,0.536,119.528,audio_features,3vkQ5DAB1qQMYO4Mr9zJN6,spotify:track:3vkQ5DAB1qQMYO4Mr9zJN6,https://api.spotify.com/v1/tracks/3vkQ5DAB1qQM...,https://api.spotify.com/v1/audio-analysis/3vkQ...,292613,4


In [54]:
print(df_w_audio_ft.shape)

(4953, 22)
