# Initial installation of required packages

In [79]:
# !pip install requests 
# !pip install beautifulsoup4 
# !pip install arrow

# Importing Packages

In [80]:
import requests
from bs4 import BeautifulSoup   
import pandas as pd
import arrow

In [81]:
base_spotify_url = "https://spotifycharts.com/regional/global/weekly/"

 ## getLists function <br>
 Used to extract the top 30 songs and their details from 200 Beautiful Soup Tags <br>
 Creates Lists of the individual details which can finally be added to the a final array to create data frame
 

In [82]:
def getLists(start_time,end_time,position_tags,tracks_tags,stream_tags,url_tags):
    start_time_arr = []
    end_time_arr = []
    position_arr = []
    track_arr = []
    stream_arr = []
    url_arr = []
    
    for num in range(30):
        # append positions
        position_arr.append(position_tags[num].text)
        
        # append time
        start_time_arr.append(start_time)
        end_time_arr.append(end_time)
    
        # append track name
        final_track_name = tracks_tags[num].text
        final_track_name = final_track_name.replace("\n"," ")
        track_arr.append(final_track_name)
    
        # append number of streams
        stream_arr.append(stream_tags[num].text) 
        
        # append url
        newUrl = url_tags[num].find("a")
        url_arr.append(newUrl.attrs["href"])
        
        
        
    return [start_time_arr,end_time_arr,position_arr,track_arr,stream_arr, url_arr]

## weeklyStats function <br>
Uses request to get the page of every spotify chart in a week. <br>
The response is then converted to html via Beautiful soup and all the required tags are sent to getLists


In [83]:

def weeklyStats(start_time,end_time):
    final_topic = base_spotify_url + start_time.format("YYYY-MM-DD") + "--" + end_time.format("YYYY-MM-DD") 
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
    response = requests.get(final_topic, headers=headers)
    if response.status_code != 200:
        raise Exception('Failed to load page {}'.format(final_topic))
    doc = BeautifulSoup(response.text,"html.parser")
    
    # getting positions
    position_class = "chart-table-position"
    position_tags = doc.find_all('td',{"class": position_class})
    
    # getting track names   
    track_name_class = "chart-table-track"
    tracks_tags = doc.find_all('td',{"class": track_name_class})
    
    # getting streams
    stream_class = "chart-table-streams"
    stream_tags = doc.find_all('td',{"class": stream_class})
    
    #getting urls
    url_class = "chart-table-image"
    url_tags = doc.find_all('td',{"class": url_class})
    
    
    return getLists(start_time.format("YYYY-MM-DD"),end_time.format("YYYY-MM-DD"),position_tags,tracks_tags,stream_tags, url_tags)
    
    

## This is the controller <br>
Final arrays are initialized here. <br>
It has a while loop that calls weeklyStats Function and gives it weekly start and end dates from 2021-10-29 to Start of the year i.e 2021-01-01 

In [85]:
end_time = arrow.get("2021-10-29","YYYY-MM-DD")
start_time = end_time.shift(weeks=-1)

final_time = arrow.get("2021-01-01","YYYY-MM-DD")
final_position_arr = []
final_start_arr = []
final_end_arr = []
final_track_arr = []
final_stream_arr = []
final_url_arr = []
while(start_time>final_time):
    arr_list = weeklyStats(start_time,end_time)
    final_start_arr = final_start_arr + arr_list[0]
    final_end_arr = final_end_arr + arr_list[1]
    final_position_arr = final_position_arr + arr_list[2]
    final_track_arr = final_track_arr + arr_list[3]
    final_stream_arr = final_stream_arr + arr_list[4]
    final_url_arr = final_url_arr + arr_list[5]

    start_time = start_time.shift(weeks=-1)
    end_time = end_time.shift(weeks=-1)




# Spotify Tracks API <br>
This is used to basically get additional info about each track like the duration, artists, features and more, which just the chart cannot provide.

In [126]:
headers = {
    'Authorization':'use your bearer token here'
}
api_url = "https://api.spotify.com/v1/tracks?market=ES&ids="


newList = list(map(lambda elem:elem[elem.rindex("/")+1:],final_url_arr))

In [None]:

i=0
tracks_arr = []
while i<len(newList):
    api_tracks = ",".join(newList[i:i+30])
    
    # get responses 
    response_data = requests.get(api_url+api_tracks, headers= headers)
    response = response_data.json()
    tracks_arr = tracks_arr + response["tracks"]
    
    i = i+30

In [140]:
final_albumName_arr = []
final_releaseDate_arr = []
final_artist_arr = []
final_features_arr = []
final_trackTime_arr = []
final_explicit_arr = []

for track in tracks_arr:
    album_name = track["album"]["name"]
    album_release_date = track["album"]["release_date"]
    artist = track["artists"][0]["name"]
    features_arr = []
    if track["artists"][1:] != []:
        for features in track["artists"][1:]:
            features_arr.append(features["name"])
    else:
        features_arr.append("None")
        
    track_time = track["duration_ms"]
    track_explicit = track["explicit"]
    
    final_albumName_arr.append(album_name)
    final_releaseDate_arr.append(album_release_date)
    final_artist_arr.append(artist)
    final_features_arr.append(",".join(features_arr))
    final_trackTime_arr.append(track_time)
    final_explicit_arr.append(track_explicit)


# Final Data Frame Created
Finally all the final arrays are used to create a DataFrame which is converted to CSV and used to perform data analysis

In [143]:
data = pd.DataFrame.from_dict({
    "Position": final_position_arr,
    "Track Name": final_track_arr,
    "Streams": final_stream_arr,
    "Chart Start Date": final_start_arr,
    "Chart End Date": final_end_arr,
    "Album Name": final_albumName_arr,
    "Release Date": final_releaseDate_arr,
    "Artist": final_artist_arr,
    "Features": final_features_arr,
    "Song Duration(ms)": final_trackTime_arr,
    "Explicit": final_explicit_arr,
    "Url": final_url_arr,
})   

data

Unnamed: 0,Position,Track Name,Streams,Chart Start Date,Chart End Date,Album Name,Release Date,Artist,Features,Song Duration(ms),Explicit,Url
0,1,Easy On Me by Adele,57081735,2021-10-22,2021-10-29,Easy On Me,2021-10-14,Adele,,224694,False,https://open.spotify.com/track/0gplL1WMoJ6iYaP...
1,2,STAY (with Justin Bieber) by The Kid LAROI,43772708,2021-10-22,2021-10-29,F*CK LOVE 3: OVER YOU,2021-07-23,The Kid LAROI,Justin Bieber,141805,True,https://open.spotify.com/track/5PjdY0CKGZdEuoN...
2,3,INDUSTRY BABY (feat. Jack Harlow) by Lil Nas X,38131363,2021-10-22,2021-10-29,MONTERO,2021-09-17,Lil Nas X,Jack Harlow,212352,True,https://open.spotify.com/track/5Z9KJZvQzH6PFmb...
3,4,Heat Waves by Glass Animals,30071945,2021-10-22,2021-10-29,Dreamland (+ Bonus Levels),2020-08-06,Glass Animals,,238805,False,https://open.spotify.com/track/02MWAaffLxlfxAU...
4,5,MONEY by LISA,27961856,2021-10-22,2021-10-29,LALISA,2021-09-10,LISA,,168227,False,https://open.spotify.com/track/7hU3IHwjX150XLo...
...,...,...,...,...,...,...,...,...,...,...,...,...
1255,26,Afterglow by Ed Sheeran,13411972,2021-01-08,2021-01-15,Afterglow,2020-12-21,Ed Sheeran,,185486,False,https://open.spotify.com/track/0E4Y1XIbs8GrAT1...
1256,27,willow by Taylor Swift,13283159,2021-01-08,2021-01-15,evermore,2020-12-10,Taylor Swift,,214706,False,https://open.spotify.com/track/3Uo7WG0vmLQ07WB...
1257,28,Save Your Tears by The Weeknd,13229592,2021-01-08,2021-01-15,After Hours,2020-03-20,The Weeknd,,215626,True,https://open.spotify.com/track/5QO79kh1waicV47...
1258,29,Hawái by Maluma,13202130,2021-01-08,2021-01-15,PAPI JUANCHO,2020-08-21,Maluma,,199112,False,https://open.spotify.com/track/1yoMvmasuxZfqHE...


In [146]:
data.to_csv("spotify-charts.csv")