# Spotify Website Data Scraping Using Selenium

In [1]:
pip install selenium 

Note: you may need to restart the kernel to use updated packages.


In [2]:
from selenium import webdriver 
from selenium.webdriver.chrome.options import Options 

options = Options()
options.add_argument('--headless=new') # to use headless browser

# initialize an instance of the chrome driver (browser)
driver = webdriver.Chrome(
    options=options,
)

# visit your target site
driver.get('https://open.spotify.com/')

In [3]:
from selenium.webdriver.common.by import By

# Fetch  the links of playlist present in home page
links = driver.find_elements(By.CLASS_NAME, 'Nqa6Cw3RkDMV8QnYreTr')

In [4]:
links

[<selenium.webdriver.remote.webelement.WebElement (session="0b833ceeb0609acca0525affd0788727", element="DEAE50AB64EA7190CF5804715122055A_element_53")>,
 <selenium.webdriver.remote.webelement.WebElement (session="0b833ceeb0609acca0525affd0788727", element="DEAE50AB64EA7190CF5804715122055A_element_54")>,
 <selenium.webdriver.remote.webelement.WebElement (session="0b833ceeb0609acca0525affd0788727", element="DEAE50AB64EA7190CF5804715122055A_element_55")>,
 <selenium.webdriver.remote.webelement.WebElement (session="0b833ceeb0609acca0525affd0788727", element="DEAE50AB64EA7190CF5804715122055A_element_56")>,
 <selenium.webdriver.remote.webelement.WebElement (session="0b833ceeb0609acca0525affd0788727", element="DEAE50AB64EA7190CF5804715122055A_element_57")>,
 <selenium.webdriver.remote.webelement.WebElement (session="0b833ceeb0609acca0525affd0788727", element="DEAE50AB64EA7190CF5804715122055A_element_58")>]

In [5]:
links_list = []
for link in links:
            links_list.append(link.get_attribute("href"))
        

In [6]:
links_list

['https://open.spotify.com/playlist/37i9dQZF1DXcBWIGoYBM5M',
 'https://open.spotify.com/playlist/37i9dQZF1DXdLK5wjKyhVm',
 'https://open.spotify.com/playlist/37i9dQZF1DWV7EzJMK2FUI',
 'https://open.spotify.com/playlist/37i9dQZF1DXaImRpG7HXqp',
 'https://open.spotify.com/playlist/37i9dQZF1DWZd79rJ6a7lp',
 'https://open.spotify.com/playlist/37i9dQZF1DXdbkmlag2h7b']

In [7]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Initialize an instance of the Chrome driver (browser)
driver1 = webdriver.Chrome(options=options)

# Visit the first playlist link
driver1.get(links_list[0])

# Wait for the songs to be present on the page
element = WebDriverWait(driver,10).until(
    EC.presence_of_element_located((By.ID, 'main'))
)


In [8]:
# Extract songs from the playlist page
songs = driver1.find_elements(By.XPATH, '//a[@class="t_yrXoUO3qGsJS4Y6iXX"]/div')

In [9]:
songs

[<selenium.webdriver.remote.webelement.WebElement (session="6409b3b03fa8c352f647964a27027d8c", element="2D4F2D574277F61E1DEBA07D6CED4024_element_25")>,
 <selenium.webdriver.remote.webelement.WebElement (session="6409b3b03fa8c352f647964a27027d8c", element="2D4F2D574277F61E1DEBA07D6CED4024_element_26")>,
 <selenium.webdriver.remote.webelement.WebElement (session="6409b3b03fa8c352f647964a27027d8c", element="2D4F2D574277F61E1DEBA07D6CED4024_element_27")>,
 <selenium.webdriver.remote.webelement.WebElement (session="6409b3b03fa8c352f647964a27027d8c", element="2D4F2D574277F61E1DEBA07D6CED4024_element_28")>,
 <selenium.webdriver.remote.webelement.WebElement (session="6409b3b03fa8c352f647964a27027d8c", element="2D4F2D574277F61E1DEBA07D6CED4024_element_29")>,
 <selenium.webdriver.remote.webelement.WebElement (session="6409b3b03fa8c352f647964a27027d8c", element="2D4F2D574277F61E1DEBA07D6CED4024_element_30")>,
 <selenium.webdriver.remote.webelement.WebElement (session="6409b3b03fa8c352f647964a2702

In [10]:
# Assuming songs is a list of WebElement objects
for song in songs:
    song_text = song.text
    print(song_text)


Cruel Summer
Lovin On Me
Paint The Town Red
greedy
Snooze
Strangers
Standing Next to You
Water
vampire
What Was I Made For? [From The Motion Picture "Barbie"]
MONACO
Calm Down (with Selena Gomez)
Agora Hills
Popular (with Playboi Carti & Madonna) - The Idol Vol. 1 (Music from the HBO Original Series)
Feather
Stick Season
Houdini
Seven (feat. Latto) (Explicit Ver.)
Lace It (with Eminem & benny blanco)
I Remember Everything (feat. Kacey Musgraves)
Lose Control
My Love Mine All Mine
Kill Bill
Surround Sound (feat. 21 Savage & Baby Tate)
FTCU


# The main scrapping code starts here.

In [11]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Enable headless mode in Selenium
options = Options()
options.add_argument('--headless=new')  # Uncomment this line to use headless browser

# Initialize an instance of the Chrome driver (browser)
driver = webdriver.Chrome(options=options)

# Visit your target site
driver.get('https://open.spotify.com/')

# Fetch all the links of playlists present on the home page
links = driver.find_elements(By.CLASS_NAME, 'Nqa6Cw3RkDMV8QnYreTr')
links_list = []  

for link in links:
    links_list.append(link.get_attribute("href"))

# Close the initial driver as it's not needed anymore
#driver.quit() # uncomment this if want

In [12]:
links_list

[]

In [13]:
def get_song(driver1):
    try:
        # Extract songs from the playlist page
        songs = driver1.find_elements(By.XPATH, '//a[@class="t_yrXoUO3qGsJS4Y6iXX"]/div')
     
    except AttributeError:
        songs = []
        

    return songs

def get_playlist(driver1):
    try:
        # Extract playlist 
        playlists = driver1.find_elements(By.XPATH, '//span[@class="rEN7ncpaUeSGL9z0NGQR"]/h1')
        
    except AttributeError:
        playlists=[]
        
    return playlists

def get_singer(driver1):
    try:
        #Extract singer
        singers = driver1.find_elements(By.XPATH, '//div[@class="Type__TypeElement-sc-goli3j-0 bGROfl"]/a[1]')
        
    except AttributeError:
        singers=[]
        
    return singers

def get_album(driver1):
    try:
        #Extract singer
        albums = driver1.find_elements(By.XPATH, '//span[@class="Type__TypeElement-sc-goli3j-0 ieTwfQ"]/a')
        
    except AttributeError:
        albums=[]
        
    return albums

def get_song_duration(driver1):
    try:
        #Extract singer
        song_durations = driver1.find_elements(By.XPATH, '//div[@class="Type__TypeElement-sc-goli3j-0 bDHxRN Btg2qHSuepFGBG6X0yEN"]')
        
    except AttributeError:
        song_durations=[]
        
    return song_durations


In [14]:
# we are storing the data in dictionary of list so that we can change it into the dataframe later
d = {"playlist":[], "song_name":[], "singer":[], "album":[],"song_duration":[]}

# Loop over the link of playlist
for link in links_list:
    # Initialize an instance of the Chrome driver (browser)
    driver1 = webdriver.Chrome(options=options)
        
    # Visit the first playlist link
    driver1.get(link)
        
    # Wait for the songs to be present on the page
    element = WebDriverWait(driver1,60).until(
        EC.presence_of_element_located((By.ID, 'main'))
    )
          
    # Function calls to extract information for songs
    for song in get_song(driver1):
        d['song_name'].append(song.text)
        length=len(get_song(driver1))

    for playlist in get_playlist(driver1):
        for i in range(length):
            d['playlist'].append(playlist.text)
    
    for singer in get_singer(driver1):
        d['singer'].append(singer.text)
    
    for album in get_album(driver1):
        d['album'].append(album.text)
        
    for song_duration in get_song_duration(driver1):
        d['song_duration'].append(song_duration.text)
        


In [15]:
for key, value in d.items():
    length_of_list = len(value)
    print(f"The length of the list for key '{key}' is: {length_of_list}")

The length of the list for key 'playlist' is: 0
The length of the list for key 'song_name' is: 0
The length of the list for key 'singer' is: 0
The length of the list for key 'album' is: 0
The length of the list for key 'song_duration' is: 0


In [16]:
d['playlist']

[]

In [17]:
import pandas as pd
# to change data stored in dictionary to dataframe
spotify_df = pd.DataFrame.from_dict(d)

In [18]:
spotify_df

Unnamed: 0,playlist,song_name,singer,album,song_duration


In [19]:
# to save the data in csv file
spotify_df.to_csv("spotify_data.csv", header=True, index=False)

# END