In [60]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import load_dotenv # Loads .env file.
import os
import json
import time
import pandas as pd

In [61]:
load_dotenv('auth.env')
SPOTIFY_CLIENT_ID = os.getenv('SPOTIFY_CLIENT_ID')
SPOTIFY_CLIENT_SECRET = os.getenv('SPOTIFY_CLIENT_SECRET')

In [62]:
client_credentials_manager = SpotifyClientCredentials(
    client_id = SPOTIFY_CLIENT_ID,
    client_secret = SPOTIFY_CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [63]:
happy_playlist_ids = ['37i9dQZF1DX9XIFQuFvzM4', '37i9dQZF1DX889U0CL85jj', '37i9dQZF1DX8Dc28snyWrn', '37i9dQZF1DWYBO1MoTDhZI', '37i9dQZF1DX4fpCWaHOned', '37i9dQZF1DWSf2RDTDayIx', '37i9dQZF1DXa19sXUAHiO1', '37i9dQZF1DX7KNKjOK0o75', '37i9dQZF1DX2sUQwD7tbmL', '37i9dQZF1DWYzpSJHStHHx', '37i9dQZF1DX1BzILRveYHb', '37i9dQZF1DX6fhMYWIyuww']
artists = []
first_artists = []
track_names = []
track_ids = []

for playlist_id in happy_playlist_ids:
    resp = sp.playlist_tracks(playlist_id, fields='items.track.artists.name, items.track.name, items.track.id', limit = 100, offset=0, market=None, additional_types=('track'))
    for item in resp['items']:
        track = item['track']
        artists_list = track['artists']
        first_artist = artists_list[0]
        name = track['name']
        id = track['id']
        
        artists.append(artists_list)
        first_artists.append(first_artist)
        track_names.append(name)
        track_ids.append(id)
        
track_ids_len = len(track_ids)
mood_list = ["happy"] * track_ids_len
audio_features_all = []

In [64]:
batch_size = 100
num_batches = (len(track_ids) + batch_size - 1) // batch_size

# Fetch audio features in batches with rate limiting
for i in range(num_batches):
    start_idx = i * batch_size
    end_idx = min((i + 1) * batch_size, len(track_ids))
    track_ids_batch = track_ids[start_idx:end_idx]
    
    try:
        audio_features_batch = sp.audio_features(tracks = track_ids_batch)
        audio_features_all += audio_features_batch
    except spotipy.SpotifyException as e:
        if e.http_status == 429:  # Check if it's a rate limit error
            retry_after = int(e.headers.get('Retry-After', 10))  # Default to 10 seconds if Retry-After header is not present
            print(f"Encountered rate limit. Waiting for {retry_after} seconds before retrying...")
            time.sleep(retry_after)
            audio_features_batch = sp.audio_features(tracks=track_ids_batch)  # Retry the request
            audio_features_all += audio_features_batch
        else:
            raise e  # Re-raise the exception if it's not a rate limit error

# Verify that the lengths match
print("Total audio features fetched:", len(audio_features_all))

Total audio features fetched: 987


In [68]:
happy_df = pd.DataFrame(data=audio_features_all, columns=audio_features_all[0].keys())
print(happy_df.head())
size_of_df = happy_df.shape
print("Number of rows:", size_of_df[0])
print("Number of columns:", size_of_df[1])

   danceability  energy  key  loudness  mode  speechiness  acousticness  \
0         0.527   0.415    4   -11.451     0       0.1220         0.457   
1         0.768   0.367    2   -11.226     1       0.0315         0.683   
2         0.650   0.306    9    -9.443     1       0.0393         0.570   
3         0.592   0.355    9   -14.051     1       0.0352         0.478   
4         0.524   0.519    6   -11.903     1       0.0430         0.195   

   instrumentalness  liveness  valence    tempo            type  \
0          0.000017    0.1170    0.515   78.169  audio_features   
1          0.000018    0.0810    0.532  103.621  audio_features   
2          0.000007    0.0707    0.605  118.068  audio_features   
3          0.000000    0.0585    0.499  133.032  audio_features   
4          0.000000    0.5230    0.847  110.121  audio_features   

                       id                                   uri  \
0  1k1Bqnv2R0uJXQN4u6LKYt  spotify:track:1k1Bqnv2R0uJXQN4u6LKYt   
1  3zBhihYUH

In [69]:
happy_df.to_csv('happy_playlist_audio_features.csv', index=False)

In [70]:
sad_playlist_ids = ['37i9dQZF1DWSqBruwoIXkA', '37i9dQZF1DWW2hj3ZtMbuO', '37i9dQZF1DX7gIoKXt0gmx', '37i9dQZF1DWZrBs4FjpxlE', '37i9dQZF1DX59NCqCqJtoH', '37i9dQZF1DWVV27DiNWxkR', '37i9dQZF1DWVrtsSlLKzro', '37i9dQZF1DWZUAeYvs88zc', '37i9dQZF1DWU4lunzhQdRx', '37i9dQZF1DWV1bxlagjEmb', '37i9dQZF1DX9AnYEthXLyU', '37i9dQZF1DX15JKV0q7shD']
artists = []
first_artists = []
track_names = []
track_ids = []

for playlist_id in sad_playlist_ids:
    resp = sp.playlist_tracks(playlist_id, fields='items.track.artists.name, items.track.name, items.track.id', limit = 100, offset=0, market=None, additional_types=('track'))
    for item in resp['items']:
        track = item['track']
        artists_list = track['artists']
        first_artist = artists_list[0]
        name = track['name']
        id = track['id']
        
        artists.append(artists_list)
        first_artists.append(first_artist)
        track_names.append(name)
        track_ids.append(id)
        
track_ids_len = len(track_ids)
mood_list = ["sad"] * track_ids_len
audio_features_all = []

In [71]:
batch_size = 100
num_batches = (len(track_ids) + batch_size - 1) // batch_size

# Fetch audio features in batches with rate limiting
for i in range(num_batches):
    start_idx = i * batch_size
    end_idx = min((i + 1) * batch_size, len(track_ids))
    track_ids_batch = track_ids[start_idx:end_idx]
    
    try:
        audio_features_batch = sp.audio_features(tracks = track_ids_batch)
        audio_features_all += audio_features_batch
    except spotipy.SpotifyException as e:
        if e.http_status == 429:  # Check if it's a rate limit error
            retry_after = int(e.headers.get('Retry-After', 10))  # Default to 10 seconds if Retry-After header is not present
            print(f"Encountered rate limit. Waiting for {retry_after} seconds before retrying...")
            time.sleep(retry_after)
            audio_features_batch = sp.audio_features(tracks=track_ids_batch)  # Retry the request
            audio_features_all += audio_features_batch
        else:
            raise e  # Re-raise the exception if it's not a rate limit error

# Verify that the lengths match
print("Total audio features fetched:", len(audio_features_all))
print(audio_features_all)

Total audio features fetched: 980
[{'danceability': 0.522, 'energy': 0.551, 'key': 0, 'loudness': -6.396, 'mode': 1, 'speechiness': 0.049, 'acousticness': 0.33, 'instrumentalness': 0, 'liveness': 0.129, 'valence': 0.242, 'tempo': 137.988, 'type': 'audio_features', 'id': '1UhDG1nEctrXaCJR85p0iF', 'uri': 'spotify:track:1UhDG1nEctrXaCJR85p0iF', 'track_href': 'https://api.spotify.com/v1/tracks/1UhDG1nEctrXaCJR85p0iF', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/1UhDG1nEctrXaCJR85p0iF', 'duration_ms': 175600, 'time_signature': 4}, {'danceability': 0.362, 'energy': 0.54, 'key': 10, 'loudness': -6.031, 'mode': 1, 'speechiness': 0.0528, 'acousticness': 0.338, 'instrumentalness': 0, 'liveness': 0.139, 'valence': 0.3, 'tempo': 78.007, 'type': 'audio_features', 'id': '315aBOUD3xtj7sUMXtRgMV', 'uri': 'spotify:track:315aBOUD3xtj7sUMXtRgMV', 'track_href': 'https://api.spotify.com/v1/tracks/315aBOUD3xtj7sUMXtRgMV', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/315aBOUD3xtj

In [72]:
sad_df = pd.DataFrame(data = audio_features_all, columns = audio_features_all[0].keys())
print(sad_df.head())
size_of_df = sad_df.shape
print("Number of rows:", size_of_df[0])
print("Number of columns:", size_of_df[1])

   danceability  energy  key  loudness  mode  speechiness  acousticness  \
0         0.522   0.551    0    -6.396     1       0.0490         0.330   
1         0.362   0.540   10    -6.031     1       0.0528         0.338   
2         0.492   0.379    0    -8.859     1       0.0257         0.785   
3         0.426   0.451    3    -8.044     1       0.1620         0.634   
4         0.508   0.419    7    -9.786     1       0.0429         0.912   

   instrumentalness  liveness  valence    tempo            type  \
0          0.000000     0.129    0.242  137.988  audio_features   
1          0.000000     0.139    0.300   78.007  audio_features   
2          0.000930     0.108    0.273   83.993  audio_features   
3          0.000000     0.103    0.484  182.921  audio_features   
4          0.000022     0.114    0.257  139.109  audio_features   

                       id                                   uri  \
0  1UhDG1nEctrXaCJR85p0iF  spotify:track:1UhDG1nEctrXaCJR85p0iF   
1  315aBOUD3

In [73]:
sad_df.to_csv('sad_playlist_audio_features.csv', index=False)