In [2]:
from dotenv import load_dotenv
import pandas as pd
import csv
import os
import base64
import requests
from requests import post, get
from scipy.stats import linregress
import json
import matplotlib.pyplot as plt
from pprint import pprint
from ids import id
from ids import secret

In [3]:
load_dotenv()
client_id = id
client_secret = secret

In [4]:
# Defining functions <3

def get_token():
    auth_string = client_id + ":" + client_secret
    auth_bytes = auth_string.encode("utf-8")
    auth_base64 = str(base64.b64encode(auth_bytes), "utf-8")

    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Authorization" : "Basic " + auth_base64,
        "Content-Type" : "application/x-www-form-urlencoded"
    }

    data = {"grant_type" : "client_credentials"}

    result = requests.post(url, headers = headers, data = data)
    json_response = json.loads(result.content)
    token = json_response["access_token"]
    return token

def get_auth_header(token):
    return{"Authorization" : "Bearer " + token}

def search_for_playlist(token, playlist_name):
    url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)
    query = f"?q={playlist_name}&type=playlist&limit=1"
    query_url = url + query
    result = requests.get(query_url, headers = headers)
    json_result_playlist = json.loads(result.content)["playlists"]["items"]
    if len(json_result_playlist) == 0:
        print("No playlist with this name found...")
        return None
    
    # pprint(json_result_playlist)
    return json_result_playlist[0]

def get_songs_in_playlist(token, artist_id):
    url = f"https://api.spotify.com/v1/playlists/{playlist_id}"
    headers = get_auth_header(token)
    result = requests.get(url, headers = headers)
    json_result = json.loads(result.content)["tracks"]["items"]
    # pprint(json_result)
    return json_result

def get_AF_in_songs(token, id_list1):    
    url = f"https://api.spotify.com/v1/audio-features?ids={id_list1}"
    headers = get_auth_header(token)
    response = requests.get(url, headers = headers)
    response_json = response.json()
    # pprint(response_json)
    return response_json

# Playlists Data Frames

In [5]:
# For 2019 playlist
token = get_token()
result = search_for_playlist(token, "Top Canciones 2019")
playlist_id = result["id"]
songs = get_songs_in_playlist(token, playlist_id)
song_list = []
for idx, song in enumerate(songs):
    song_data = {"Song":song["track"]["name"],
                 "Artist":song["track"]["artists"][0]["name"],
                 "Song Lenght (ms)":song["track"]["duration_ms"],
                 "Popularity":song["track"]["popularity"],
                 "Release date":song["track"]["album"]["release_date"],
                 "Song Id":song["track"]["id"]}
    song_list.append(song_data)
spoti_19 = pd.DataFrame(song_list)
# spoti_19.head()

In [6]:
# For 2020 playlist
token = get_token()
result = search_for_playlist(token, "Top Canciones de 2020")
playlist_id = result["id"]
songs = get_songs_in_playlist(token, playlist_id)
song_list = []
for idx, song in enumerate(songs):
    song_data = {"Song":song["track"]["name"],
                 "Artist":song["track"]["artists"][0]["name"],
                 "Song Lenght (ms)":song["track"]["duration_ms"],
                 "Popularity":song["track"]["popularity"],
                 "Release date":song["track"]["album"]["release_date"],
                 "Song Id":song["track"]["id"]}
    song_list.append(song_data)
spoti_20 = pd.DataFrame(song_list)
# spoti_20.head()

In [7]:
# For 2021 playlist
token = get_token()
result = search_for_playlist(token, "Top Canciones México 2021")
playlist_id = result["id"]
songs = get_songs_in_playlist(token, playlist_id)
song_list = []
for idx, song in enumerate(songs):
    song_data = {"Song":song["track"]["name"],
                 "Artist":song["track"]["artists"][0]["name"],
                 "Song Lenght (ms)":song["track"]["duration_ms"],
                 "Popularity":song["track"]["popularity"],
                 "Release date":song["track"]["album"]["release_date"],
                 "Song Id":song["track"]["id"]}
    song_list.append(song_data)
spoti_21 = pd.DataFrame(song_list)
# spoti_21.head()


In [8]:
# For 2022 playlist
token = get_token()
result = search_for_playlist(token, "Top Canciones México 2022")
playlist_id = result["id"]
songs = get_songs_in_playlist(token, playlist_id)
song_list = []
for idx, song in enumerate(songs):
    song_data = {"Song":song["track"]["name"],
                 "Artist":song["track"]["artists"][0]["name"],
                 "Song Lenght (ms)":song["track"]["duration_ms"],
                 "Popularity":song["track"]["popularity"],
                 "Release date":song["track"]["album"]["release_date"],
                 "Song Id":song["track"]["id"]}
    song_list.append(song_data)
spoti_22 = pd.DataFrame(song_list)
# spoti_22.head()

In [9]:
# Merge the playlists data frames
spoti=spoti_19.merge(spoti_20, how='outer')
spoti=spoti.merge(spoti_21, how='outer')
spoti=spoti.merge(spoti_22, how='outer')
spoti.drop_duplicates
spoti

Unnamed: 0,Song,Artist,Song Lenght (ms),Popularity,Release date,Song Id
0,Calma - Remix,Pedro Capó,238200,77,2018-10-05,5iwz1NiezX7WWjnCgY5TH4
1,Con Calma,Daddy Yankee,193226,83,2019-01-24,5w9c2J52mkdntKOmRLeM2m
2,Te Vi,Piso 21,231848,80,2018-12-14,059bcIhyc2SBwm6sw2AZzd
3,Otro Trago,Sech,225933,75,2019-04-19,1Ej96GIBCTvgH7tNX1r3qr
4,Callaita,Bad Bunny,250533,80,2019-05-31,2TH65lNHgvLxCKXM3apjxI
...,...,...,...,...,...,...
230,Un Ratito,Bad Bunny,176936,82,2022-05-06,5CzixCxDkRXX9mScCmah8O
231,Si Fuera Fácil,Grupo Marca Registrada,170496,77,2022-05-27,7Jph6HJbfURt4Fea19IED3
232,Dos Mil 16,Bad Bunny,208615,81,2022-05-06,4d4ZXH4dr5bYfgErHiZCX2
233,Siempre Pendientes,Peso Pluma,186828,85,2022-08-15,3gfSH9aYZbyGjdmbeoWliw


# Song Features

In [10]:
# Tomar la columna Song Id del DataFrame con los Top Tracks y convertir los valores en una lista de strings separadas por comas, para pasarlo así al request de los Audio Features

id_list = spoti["Song Id"]
id_list1 = id_list[0:100]
id_list1 = ",".join(id_list1)
id_list2= id_list[100:200]
id_list2 = ",".join(id_list2)
id_list3= id_list[200:]
id_list3 = ",".join(id_list3)
# print(id_list1)


In [11]:
# DataFrame con los Audio Features del 0 al 100

token = get_token()
audio_features_data = get_AF_in_songs(token, id_list1)["audio_features"]
# pprint(audio_features_data)
audio_features_list = []


for idx, af in enumerate(audio_features_data):
    audio_features = {"Acousticness":af["acousticness"],
                 "Danceability":af["danceability"],
                 "Energy":af["energy"],
                 "Instrumentalness":af["energy"],
                 "Liveness":af["energy"],
                 "Speechiness":af["speechiness"],
                 "Tempo":af["tempo"],
                 "Key" : af["key"],
                 "Loudness" : af["loudness"],
                 "Mode" : af["mode"],
                 "Time Signature" : af["time_signature"],
                 "Valence":af["valence"],
                 "Song Id":af["id"]}
    audio_features_list.append(audio_features)
audio_features_df1 = pd.DataFrame(audio_features_list)
audio_features_df1


Unnamed: 0,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Speechiness,Tempo,Key,Loudness,Mode,Time Signature,Valence,Song Id
0,0.3230,0.826,0.773,0.773,0.773,0.0524,126.899,11,-4.218,0,4,0.761,5iwz1NiezX7WWjnCgY5TH4
1,0.1100,0.737,0.860,0.860,0.860,0.0593,93.989,8,-2.652,0,4,0.656,5w9c2J52mkdntKOmRLeM2m
2,0.0446,0.877,0.777,0.777,0.777,0.1170,102.020,1,-4.246,1,4,0.706,059bcIhyc2SBwm6sw2AZzd
3,0.1360,0.746,0.700,0.700,0.700,0.3410,176.044,0,-4.669,1,4,0.619,1Ej96GIBCTvgH7tNX1r3qr
4,0.6000,0.610,0.624,0.624,0.624,0.3090,176.169,2,-4.773,1,4,0.244,2TH65lNHgvLxCKXM3apjxI
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.0427,0.842,0.734,0.734,0.734,0.0588,137.958,1,-5.065,0,4,0.952,22vgEDb5hykfaTwLuskFGD
96,0.1330,0.806,0.778,0.778,0.778,0.0616,95.993,1,-4.577,0,4,0.840,71iAechwVKGu26pahzFL0k
97,0.1900,0.770,0.750,0.750,0.750,0.0785,99.997,0,-2.715,1,4,0.609,08aYFNUTIOMGq93e2VSArQ
98,0.1880,0.857,0.772,0.772,0.772,0.1950,94.030,10,-4.737,0,4,0.589,28O1p0bQXH2F6Qn8jBZZR4


In [12]:
# DataFrame con los Audio Features del 101 al 200

token = get_token()
audio_features_data = get_AF_in_songs(token, id_list2)["audio_features"]
# pprint(audio_features_data)
audio_features_list = []


for idx, af in enumerate(audio_features_data):
    audio_features = {"Acousticness":af["acousticness"],
                 "Danceability":af["danceability"],
                 "Energy":af["energy"],
                 "Instrumentalness":af["energy"],
                 "Liveness":af["energy"],
                 "Speechiness":af["speechiness"],
                 "Tempo":af["tempo"],
                 "Key" : af["key"],
                 "Loudness" : af["loudness"],
                 "Mode" : af["mode"],
                 "Time Signature" : af["time_signature"],
                 "Valence":af["valence"],
                 "Song Id":af["id"]}
    audio_features_list.append(audio_features)
audio_features_df2 = pd.DataFrame(audio_features_list)
audio_features_df2


Unnamed: 0,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Speechiness,Tempo,Key,Loudness,Mode,Time Signature,Valence,Song Id
0,0.29500,0.803,0.715,0.715,0.715,0.2980,101.085,2,-3.280,1,4,0.574,7k4t7uLgtOxPwTpFmtJNTY
1,0.00146,0.514,0.730,0.730,0.730,0.0598,171.005,1,-5.934,1,4,0.334,0VjIjW4GlUZAMYd2vXMi3b
2,0.01030,0.607,0.829,0.829,0.829,0.3790,96.172,2,-4.074,1,4,0.685,2DEZmgHKAvm41k4J3R2E9Y
3,0.18500,0.783,0.727,0.727,0.727,0.3890,180.067,6,-3.454,0,4,0.558,1yoMvmasuxZfqHEipJhRbp
4,0.11000,0.883,0.668,0.668,0.668,0.1130,87.972,9,-2.957,1,4,0.676,7sQKy5vlPQllr0k9IjYJv3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.09930,0.650,0.715,0.715,0.715,0.2530,106.672,5,-5.198,0,4,0.187,1IHWl5LamUGEuP4ozKQSXZ
96,0.08000,0.647,0.686,0.686,0.686,0.0413,79.928,3,-5.745,0,4,0.268,3k3NWokhRRkEPhCzPmV8TW
97,0.14100,0.801,0.475,0.475,0.475,0.0516,98.047,7,-8.797,0,4,0.234,5Eax0qFko2dh7Rl2lYs3bx
98,0.01250,0.621,0.782,0.782,0.782,0.0440,128.033,2,-5.548,1,4,0.550,2tTmW7RDtMQtBk7m2rYeSw


In [13]:
# DataFrame con los Audio Features del 201 al 235

token = get_token()
audio_features_data = get_AF_in_songs(token, id_list3)["audio_features"]
# pprint(audio_features_data)
audio_features_list = []


for idx, af in enumerate(audio_features_data):
    audio_features = {"Acousticness":af["acousticness"],
                 "Danceability":af["danceability"],
                 "Energy":af["energy"],
                 "Instrumentalness":af["energy"],
                 "Liveness":af["energy"],
                 "Speechiness":af["speechiness"],
                 "Tempo":af["tempo"],
                 "Key" : af["key"],
                 "Loudness" : af["loudness"],
                 "Mode" : af["mode"],
                 "Time Signature" : af["time_signature"],
                 "Valence":af["valence"],
                 "Song Id":af["id"]}
    audio_features_list.append(audio_features)
audio_features_df3 = pd.DataFrame(audio_features_list)
audio_features_df3

Unnamed: 0,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Speechiness,Tempo,Key,Loudness,Mode,Time Signature,Valence,Song Id
0,0.294,0.804,0.674,0.674,0.674,0.0333,99.968,5,-5.453,0,4,0.292,6Xom58OOXk2SoU711L2IXO
1,0.0151,0.829,0.799,0.799,0.799,0.0897,97.013,9,-5.389,1,4,0.471,4tYFy8ALRjIZvnvSLw5lxN
2,0.395,0.551,0.436,0.436,0.436,0.0428,188.651,8,-8.029,1,3,0.862,74WROFXsox6wkeUrEK2DS6
3,0.0889,0.639,0.703,0.703,0.703,0.0794,169.888,4,-6.33,0,4,0.138,4gzsuuZypVbxs0Af1LSZyB
4,0.356,0.869,0.694,0.694,0.694,0.0783,90.02,1,-3.35,0,4,0.511,6mmPpaltUZK7xjNlBPQQ0p
5,0.342,0.52,0.731,0.731,0.731,0.0557,173.93,6,-5.338,0,4,0.662,4Dvkj6JhhA12EX05fT7y2e
6,0.0706,0.876,0.498,0.498,0.498,0.0478,122.016,10,-7.511,1,4,0.428,31i56LZnwE6uSu3exoHjtB
7,0.437,0.588,0.452,0.452,0.452,0.0268,139.953,7,-4.75,1,4,0.734,0UXlu64mDLvfzR8IXMz06J
8,0.55,0.702,0.586,0.586,0.586,0.118,112.852,8,-5.317,1,4,0.965,5kpxVMDvYGRmNqawPqDTYR
9,0.0924,0.87,0.548,0.548,0.548,0.077,96.018,10,-5.253,0,4,0.832,1O2pcBJGej0pmH2Y9XZMs6


# Merge and export

In [14]:
total_af = audio_features_df1.merge(audio_features_df2, how='outer')
total_af = total_af.merge(audio_features_df3, how='outer')
total_af

Unnamed: 0,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Speechiness,Tempo,Key,Loudness,Mode,Time Signature,Valence,Song Id
0,0.3230,0.826,0.773,0.773,0.773,0.0524,126.899,11,-4.218,0,4,0.761,5iwz1NiezX7WWjnCgY5TH4
1,0.1100,0.737,0.860,0.860,0.860,0.0593,93.989,8,-2.652,0,4,0.656,5w9c2J52mkdntKOmRLeM2m
2,0.0446,0.877,0.777,0.777,0.777,0.1170,102.020,1,-4.246,1,4,0.706,059bcIhyc2SBwm6sw2AZzd
3,0.1360,0.746,0.700,0.700,0.700,0.3410,176.044,0,-4.669,1,4,0.619,1Ej96GIBCTvgH7tNX1r3qr
4,0.6000,0.610,0.624,0.624,0.624,0.3090,176.169,2,-4.773,1,4,0.244,2TH65lNHgvLxCKXM3apjxI
...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,0.3050,0.787,0.546,0.546,0.546,0.0532,93.050,0,-7.094,0,4,0.222,5CzixCxDkRXX9mScCmah8O
231,0.4460,0.794,0.677,0.677,0.677,0.0713,101.467,10,-9.969,1,3,0.611,7Jph6HJbfURt4Fea19IED3
232,0.1230,0.817,0.670,0.670,0.670,0.0506,130.058,1,-6.534,1,3,0.495,4d4ZXH4dr5bYfgErHiZCX2
233,0.3310,0.774,0.748,0.748,0.748,0.0352,135.929,0,-7.086,1,3,0.714,3gfSH9aYZbyGjdmbeoWliw


In [15]:
final_spoti_df = spoti.merge(total_af, on="Song Id", how="outer")
final_spoti_df

Unnamed: 0,Song,Artist,Song Lenght (ms),Popularity,Release date,Song Id,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Speechiness,Tempo,Key,Loudness,Mode,Time Signature,Valence
0,Calma - Remix,Pedro Capó,238200,77,2018-10-05,5iwz1NiezX7WWjnCgY5TH4,0.3230,0.826,0.773,0.773,0.773,0.0524,126.899,11,-4.218,0,4,0.761
1,Con Calma,Daddy Yankee,193226,83,2019-01-24,5w9c2J52mkdntKOmRLeM2m,0.1100,0.737,0.860,0.860,0.860,0.0593,93.989,8,-2.652,0,4,0.656
2,Te Vi,Piso 21,231848,80,2018-12-14,059bcIhyc2SBwm6sw2AZzd,0.0446,0.877,0.777,0.777,0.777,0.1170,102.020,1,-4.246,1,4,0.706
3,Otro Trago,Sech,225933,75,2019-04-19,1Ej96GIBCTvgH7tNX1r3qr,0.1360,0.746,0.700,0.700,0.700,0.3410,176.044,0,-4.669,1,4,0.619
4,Callaita,Bad Bunny,250533,80,2019-05-31,2TH65lNHgvLxCKXM3apjxI,0.6000,0.610,0.624,0.624,0.624,0.3090,176.169,2,-4.773,1,4,0.244
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,Un Ratito,Bad Bunny,176936,82,2022-05-06,5CzixCxDkRXX9mScCmah8O,0.3050,0.787,0.546,0.546,0.546,0.0532,93.050,0,-7.094,0,4,0.222
231,Si Fuera Fácil,Grupo Marca Registrada,170496,77,2022-05-27,7Jph6HJbfURt4Fea19IED3,0.4460,0.794,0.677,0.677,0.677,0.0713,101.467,10,-9.969,1,3,0.611
232,Dos Mil 16,Bad Bunny,208615,81,2022-05-06,4d4ZXH4dr5bYfgErHiZCX2,0.1230,0.817,0.670,0.670,0.670,0.0506,130.058,1,-6.534,1,3,0.495
233,Siempre Pendientes,Peso Pluma,186828,85,2022-08-15,3gfSH9aYZbyGjdmbeoWliw,0.3310,0.774,0.748,0.748,0.748,0.0352,135.929,0,-7.086,1,3,0.714


In [17]:
# final_spoti_df to a csv file

final_spoti_df.to_csv('outputs/spotify_and_features.csv', index=False)