## **Imports**

In [1]:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.chrome.options import Options as ChromeOptions

import pandas as pd

## **Create headless webdriver**

In [2]:
"""
Download the correct webdriver for your system and place it in this root folder
geckodriver download (for Firefox): https://github.com/mozilla/geckodriver/releases
chromedriver download (for Chrome): https://chromedriver.chromium.org/downloads
"""
browser = "firefox"

if browser == "firefox":
    firefox_options = FirefoxOptions()
    firefox_options.headless = True
    driver = webdriver.Firefox(options=firefox_options, executable_path="./geckodriver.exe")
elif browser == "chrome":
    chrome_options = ChromeOptions()
    chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(options=chrome_options, executable_path="./chromedriver.exe")

## **Gets download page**

In [3]:
param = {}

"""
Chart Type:
'regional' => Top 200
'viral'    => Viral 50
"""
param["chart"] = "regional"

"""
Country:
'global' => Global chart
'br'     => Country chart (https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements)
"""
param["country"] = "global"

"""
Recurrence:
'daily'  => Daily chart
'weekly' => Weekly chart
"""
param["recurrence"] = "daily"

"""
Date:
'latest'     => Latest chart
'yyyy-mm-dd' => Specific date, for 'daily' recurrence
'yyyy-mm-dd--YYYY-MM-DD' => Specific week range (e.g. 2021-07-30--2021-08-06), for 'weekly' recurrence
"""
param["date"] = "latest"

driver.get(f"https://spotifycharts.com/{param['chart']}/{param['country']}/{param['recurrence']}/{param['date']}/")

## **Scrapes Spotify Charts table data**

In [4]:
df = pd.DataFrame()

if param["chart"] == "regional":
    table = driver.find_elements_by_xpath("/html/body/div/div/div/div/span/table/tbody/tr")
    for row in table:
        row_data = {}
        row_data["url"] = row.find_element_by_xpath(".//td[1]/a").get_attribute("href")
        row_data["img"] = row.find_element_by_xpath(".//td[1]/a/img").get_attribute("src")
        row_data["position"] = int(row.find_element_by_xpath(".//td[2]").text)
        row_data["track"] = row.find_element_by_xpath(".//td[4]/strong").text
        row_data["artists"] = row.find_element_by_xpath(".//td[4]/span").text[3:].split(", ")
        row_data["streams"] = int(row.find_element_by_xpath(".//td[5]").text.replace(",",""))

        row_df = pd.DataFrame.from_records([row_data])
        df = pd.concat([df, row_df], ignore_index=True)

else:
    table = driver.find_elements_by_xpath("/html/body/div/div/div/div/span/table/tbody/tr")
    for row in table:
        row_data = {}
        row_data["url"] = row.find_element_by_xpath(".//td[1]/a").get_attribute("href")
        row_data["img"] = row.find_element_by_xpath(".//td[1]/a/img").get_attribute("src")
        row_data["position"] = int(row.find_element_by_xpath(".//td[2]").text)
        row_data["track"] = row.find_element_by_xpath(".//td[4]/strong").text
        row_data["artists"] = row.find_element_by_xpath(".//td[4]/span").text[3:].split(", ")

        row_df = pd.DataFrame.from_records([row_data])
        df = pd.concat([df, row_df], ignore_index=True)

## **Closes webdriver**

In [5]:
driver.close()

In [6]:
df.head()

Unnamed: 0,url,img,position,track,artists
0,https://open.spotify.com/track/2G3pr93yMNJ2HcL...,https://i.scdn.co/image/ab67616d00004851f2fe6b...,1,Antes de Ir,"[Taty pink, Romeu]"
1,https://open.spotify.com/track/0wvV4P3u4wnVggA...,https://i.scdn.co/image/ab67616d00004851c7a709...,2,Viela,[Marcynho Sensação]
2,https://open.spotify.com/track/24zleF4VGzdFZPD...,https://i.scdn.co/image/ab67616d00004851090f4d...,3,Role,[Marcynho Sensação]
3,https://open.spotify.com/track/3Wrjm47oTz2sjIg...,https://i.scdn.co/image/ab67616d00004851fa0ab3...,4,Beggin',[Måneskin]
4,https://open.spotify.com/track/41pFFOnh1glVNIl...,https://i.scdn.co/image/ab67616d0000485161722b...,5,Se Joga no Passinho,"[Brisa Star, Thiago Jhonathan (TJ)]"


In [7]:
os.makedirs("./data", exist_ok=True)
df.to_csv(f"./data/spotifycharts_{param['chart']}_{param['country']}_{param['recurrence']}_{param['date']}.csv", sep=";", index=False)

FileExistsError: [WinError 183] Não é possível criar um arquivo já existente: './data'