In [16]:
import os
from dotenv import load_dotenv
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

from utils.etl import get_playlist_info, get_track_info, get_tracks_from_playlists

In [17]:
genres_seeds = [
    "acoustic", "afrobeat", "alt-rock", "alternative", "ambient", "anime", "black-metal", 
    "bluegrass", "blues", "bossanova", "brazil", "breakbeat", "british", "cantopop", 
    "chicago-house", "children", "chill", "classical", "club", "comedy", "country", "dance", 
    "dancehall", "death-metal", "deep-house", "detroit-techno", "disco", "disney", 
    "drum-and-bass", "dub", "dubstep", "edm", "electro", "electronic", "emo", "folk", "forro", 
    "french", "funk", "garage", "german", "gospel", "goth", "grindcore", "groove", "grunge", 
    "guitar", "happy", "hard-rock", "hardcore", "hardstyle", "heavy-metal", "hip-hop", "holidays", 
    "honky-tonk", "house", "idm", "indian", "indie", "indie-pop", "industrial", "iranian", 
    "j-dance", "j-idol", "j-pop", "j-rock", "jazz", "k-pop", "kids", "latin", "latino", "malay", 
    "mandopop", "metal", "metal-misc", "metalcore", "minimal-techno", "movies", "mpb", "new-age", 
    "new-release", "opera", "pagode", "party", "philippines-opm", "piano", "pop", "pop-film", 
    "post-dubstep", "power-pop", "progressive-house", "psych-rock", "punk", "punk-rock", "r-n-b", 
    "rainy-day", "reggae", "reggaeton", "road-trip", "rock", "rock-n-roll", "rockabilly", "romance", 
    "sad", "salsa", "samba", "sertanejo", "show-tunes", "singer-songwriter", "ska", "sleep", 
    "songwriter", "soul", "soundtracks", "spanish", "study", "summer", "swedish", "synth-pop", 
    "tango", "techno", "trance", "trip-hop", "turkish", "work-out", "world-music"
]

In [18]:
# Spotify API credentials
load_dotenv()
client_id = os.getenv('CLIENT_ID')
client_secret = os.getenv('CLIENT_SECRET')

In [19]:
# Spotify API authentication
client_credentials_manager = SpotifyClientCredentials(
    client_id=client_id,
    client_secret=client_secret
)
sp = spotipy.Spotify(
    client_credentials_manager=client_credentials_manager
)

In [20]:
# Search playlists by genre
# TODO: Convert to function
playlists_data = sp.search(q='alt-rock', type='playlist', limit=50, offset=0)
playlists = [
    get_playlist_info(playlist)
    for playlist in playlists_data['playlists']['items']
]
df_playlists = pd.DataFrame(playlists)
print(df_playlists.shape)
df_playlists.head()

(50, 4)


Unnamed: 0,id,name,tracks_href,main_image
0,5W9dGuMEjLuSMmCgaYzIol,Alt rock,https://api.spotify.com/v1/playlists/5W9dGuMEj...,https://mosaic.scdn.co/640/ab67616d0000b273038...
1,37i9dQZF1EIefLxrHQP8p4,Alternative Rock Mix,https://api.spotify.com/v1/playlists/37i9dQZF1...,https://seed-mix-image.spotifycdn.com/v6/img/d...
2,37i9dQZF1DX9GRpeH4CL0S,Essential Alternative,https://api.spotify.com/v1/playlists/37i9dQZF1...,https://i.scdn.co/image/ab67706f00000003437bdc...
3,6UkcmQf7tYlGKX18IL8cyd,alt rock,https://api.spotify.com/v1/playlists/6UkcmQf7t...,https://mosaic.scdn.co/640/ab67616d0000b2730a8...
4,37i9dQZF1DXa6YOhGMjjgx,New Alt-Rock Mixtape,https://api.spotify.com/v1/playlists/37i9dQZF1...,https://i.scdn.co/image/ab67706f00000003e00b16...


In [21]:
df_playlists["id"].isna().value_counts()

id
False    50
Name: count, dtype: int64

In [22]:
# Get tracks from playlists
tracks_playlists = get_tracks_from_playlists(playlists=df_playlists, sp=sp)
df_tracks = pd.DataFrame(tracks_playlists)
print(df_tracks.shape)
df_tracks.head()

(4313, 5)


Unnamed: 0,id,name,track_href,album_name,album_id
0,6L89mwZXSOwYl76YXfX13s,Basket Case,https://api.spotify.com/v1/tracks/6L89mwZXSOwY...,Dookie,4uG8q3GPuWHQlRbswMIRS6
1,48UPSzbZjgc449aqz8bxox,Californication,https://api.spotify.com/v1/tracks/48UPSzbZjgc4...,Californication (Deluxe Edition),2Y9IRtehByVkegoD7TcLfi
2,5UWwZ5lm5PKu6eKsHAGxOk,Everlong,https://api.spotify.com/v1/tracks/5UWwZ5lm5PKu...,The Colour And The Shape,30ly6F6Xl0TKmyBCU50Khv
3,42et6fnHCw1HIPSrdPprMl,Semi-Charmed Life,https://api.spotify.com/v1/tracks/42et6fnHCw1H...,Third Eye Blind,2gToC0XAblE9h3UZD6aAaQ
4,04w73SmPfQkkmEpKbcrHOL,Heaven Knows,https://api.spotify.com/v1/tracks/04w73SmPfQkk...,Going To Hell (Deluxe Edition),2PYeB2ZxvfvPsEpd951xP4


In [23]:
df_tracks["id"].isna().value_counts()

id
False    4311
True        2
Name: count, dtype: int64

In [24]:
# Delete rows with null values
df_tracks.dropna(inplace=True)
df_tracks["id"].isna().value_counts()

id
False    4311
Name: count, dtype: int64

In [25]:
# Delete duplicates by id
df_tracks.drop_duplicates(subset=["id"], inplace=True)
df_tracks.shape

(3414, 5)

In [26]:
# Get audio features from tracks
audio_features = []

for n in range(len(df_tracks) // 100 + 1):
    print('🟦', end='')
    df_tracks_pack = df_tracks.iloc[0 + 100 * n:100 + 100 * n]
    audio_features_pack = sp.audio_features(tracks=list(df_tracks_pack["id"]))
    audio_features += audio_features_pack

df_audio_features = pd.DataFrame(audio_features)
print()
print(df_audio_features.shape)
df_audio_features.head()

🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦🟦
(3414, 18)


Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.442,0.943,3,-3.205,1,0.0602,0.00293,9e-06,0.091,0.781,85.064,audio_features,6L89mwZXSOwYl76YXfX13s,spotify:track:6L89mwZXSOwYl76YXfX13s,https://api.spotify.com/v1/tracks/6L89mwZXSOwY...,https://api.spotify.com/v1/audio-analysis/6L89...,181533,4
1,0.592,0.767,9,-2.788,0,0.027,0.0021,0.00165,0.127,0.328,96.483,audio_features,48UPSzbZjgc449aqz8bxox,spotify:track:48UPSzbZjgc449aqz8bxox,https://api.spotify.com/v1/tracks/48UPSzbZjgc4...,https://api.spotify.com/v1/audio-analysis/48UP...,329733,4
2,0.413,0.881,11,-5.541,0,0.0367,6e-05,0.000308,0.0805,0.364,158.066,audio_features,5UWwZ5lm5PKu6eKsHAGxOk,spotify:track:5UWwZ5lm5PKu6eKsHAGxOk,https://api.spotify.com/v1/tracks/5UWwZ5lm5PKu...,https://api.spotify.com/v1/audio-analysis/5UWw...,250547,4
3,0.64,0.864,7,-6.576,1,0.0314,0.00832,0.0,0.123,0.701,102.026,audio_features,42et6fnHCw1HIPSrdPprMl,spotify:track:42et6fnHCw1HIPSrdPprMl,https://api.spotify.com/v1/tracks/42et6fnHCw1H...,https://api.spotify.com/v1/audio-analysis/42et...,268360,4
4,0.543,0.815,2,-3.712,1,0.0439,0.078,0.0,0.21,0.579,83.992,audio_features,04w73SmPfQkkmEpKbcrHOL,spotify:track:04w73SmPfQkkmEpKbcrHOL,https://api.spotify.com/v1/tracks/04w73SmPfQkk...,https://api.spotify.com/v1/audio-analysis/04w7...,224533,4


In [27]:
# Merge dataframes df_tracks and df_audio_features
df_tracks_audio_features = pd.merge(
    df_tracks,
    df_audio_features,
    left_on="id",
    right_on="id",
    how="inner"
)
print(df_tracks_audio_features.shape)
df_tracks_audio_features.head()

(3414, 22)


Unnamed: 0,id,name,track_href_x,album_name,album_id,danceability,energy,key,loudness,mode,...,instrumentalness,liveness,valence,tempo,type,uri,track_href_y,analysis_url,duration_ms,time_signature
0,6L89mwZXSOwYl76YXfX13s,Basket Case,https://api.spotify.com/v1/tracks/6L89mwZXSOwY...,Dookie,4uG8q3GPuWHQlRbswMIRS6,0.442,0.943,3,-3.205,1,...,9e-06,0.091,0.781,85.064,audio_features,spotify:track:6L89mwZXSOwYl76YXfX13s,https://api.spotify.com/v1/tracks/6L89mwZXSOwY...,https://api.spotify.com/v1/audio-analysis/6L89...,181533,4
1,48UPSzbZjgc449aqz8bxox,Californication,https://api.spotify.com/v1/tracks/48UPSzbZjgc4...,Californication (Deluxe Edition),2Y9IRtehByVkegoD7TcLfi,0.592,0.767,9,-2.788,0,...,0.00165,0.127,0.328,96.483,audio_features,spotify:track:48UPSzbZjgc449aqz8bxox,https://api.spotify.com/v1/tracks/48UPSzbZjgc4...,https://api.spotify.com/v1/audio-analysis/48UP...,329733,4
2,5UWwZ5lm5PKu6eKsHAGxOk,Everlong,https://api.spotify.com/v1/tracks/5UWwZ5lm5PKu...,The Colour And The Shape,30ly6F6Xl0TKmyBCU50Khv,0.413,0.881,11,-5.541,0,...,0.000308,0.0805,0.364,158.066,audio_features,spotify:track:5UWwZ5lm5PKu6eKsHAGxOk,https://api.spotify.com/v1/tracks/5UWwZ5lm5PKu...,https://api.spotify.com/v1/audio-analysis/5UWw...,250547,4
3,42et6fnHCw1HIPSrdPprMl,Semi-Charmed Life,https://api.spotify.com/v1/tracks/42et6fnHCw1H...,Third Eye Blind,2gToC0XAblE9h3UZD6aAaQ,0.64,0.864,7,-6.576,1,...,0.0,0.123,0.701,102.026,audio_features,spotify:track:42et6fnHCw1HIPSrdPprMl,https://api.spotify.com/v1/tracks/42et6fnHCw1H...,https://api.spotify.com/v1/audio-analysis/42et...,268360,4
4,04w73SmPfQkkmEpKbcrHOL,Heaven Knows,https://api.spotify.com/v1/tracks/04w73SmPfQkk...,Going To Hell (Deluxe Edition),2PYeB2ZxvfvPsEpd951xP4,0.543,0.815,2,-3.712,1,...,0.0,0.21,0.579,83.992,audio_features,spotify:track:04w73SmPfQkkmEpKbcrHOL,https://api.spotify.com/v1/tracks/04w73SmPfQkk...,https://api.spotify.com/v1/audio-analysis/04w7...,224533,4
