# Lab | API wrappers - Create your collection of songs & audio features

#### To move forward with the project, you need to create a collection of songs with their audio features - as large as possible!

#### And, later, when the user inputs a song, we will find the cluster to which the song belongs and recommend a song from the same cluster. The more songs you have, the more accurate and diverse recommendations you'll be able to give. Although... you might want to make sure the collected songs are "curated" in a certain way. Try to find playlists of songs that are diverse, but also that meet certain standards.

#### An idea for collecting as many songs as possible is to start with all the songs of a big, diverse playlist and then go to every artist present in the playlist and grab every song of every album of that artist. The amount of songs you'll be collecting per playlist will grow exponentially!

In [1]:
import json
import pandas as pd

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="cf245f23e4a6465d816f3d4b131c8dcd",
                                                           client_secret="935649211d1b4698bdda1fe65e7dd205"))

In [3]:
results = sp.user_playlist_tracks("spotify", "38wkMV0C3k4ldgRZ1mVW7H")
tracks = results['items']


In [4]:
for oset in range(100, results['total'],100):
    results = sp.user_playlist_tracks("spotify", "38wkMV0C3k4ldgRZ1mVW7H", offset=oset)
    tracks += results['items']
len(tracks)

288

In [5]:
def get_playlist_tracks(username, playlist_id):
    
    results = sp.user_playlist_tracks(username, playlist_id)
    tracks = results['items']
    
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    
    return tracks

tracks = get_playlist_tracks("spotify", "38wkMV0C3k4ldgRZ1mVW7H")

In [6]:
# Getting all the artists

def get_artists_from_playlist(playlist_id):
    
    tracks_from_playlist = get_playlist_tracks("spotify", playlist_id)
    
    artists = []
    
    for track in tracks_from_playlist:
        artists_info = track['track']['artists']
        
        for artist_info in artists_info:
            artists.append(artist_info['name'])
    
    return list(set(artists))

In [7]:
# Getting all the artists IDs

def get_artists_ids_from_playlist(playlist_id):
    
    tracks_from_playlist = get_playlist_tracks("spotify", playlist_id)
    
    artists_ids = []
    
    for track in tracks_from_playlist:
        artists_info = track['track']['artists']
        
        for artist_info in artists_info:
            artists_ids.append(artist_info['id'])
            
    return list(set(artists_ids))

In [8]:
artists = get_artists_from_playlist("38wkMV0C3k4ldgRZ1mVW7H")

In [9]:
artists_ids = get_artists_ids_from_playlist("38wkMV0C3k4ldgRZ1mVW7H")

In [19]:
playlist_features_list = ["artist","album","track_name",  "track_id","danceability","energy","key","loudness","mode", "speechiness","instrumentalness","liveness","valence","tempo", "duration_ms","time_signature"]
playlist_df = pd.DataFrame(columns = playlist_features_list)

for track in tracks:
            # Create empty dict
        playlist_features = {}
            # Get metadata
        playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
        playlist_features["album"] = track["track"]["album"]["name"]
        playlist_features["track_name"] = track["track"]["name"]
        playlist_features["track_id"] = track["track"]["id"]
        
        # Get audio features
        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[4:]:
            playlist_features[feature] = audio_features[feature]
            
            # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)

In [70]:
playlist_df.head()

Unnamed: 0,artist,album,track_name,track_id,danceability,energy,key,loudness,mode,speechiness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Yoke Lore,Spotify Singles,Truly Madly Deeply - Recorded at Spotify Studi...,0hLObGB9xRjuRVasHehmLI,0.675,0.368,0,-14.072,0,0.0375,0.00158,0.136,0.176,106.004,190393,4
1,Yoke Lore,Goodpain,Beige,0FfZudphGgQTGeEV6AhmOs,0.434,0.677,10,-7.535,1,0.119,0.0684,0.119,0.218,167.927,204209,4
2,Hollow Coves,The Woods (Acoustic),The Woods - Acoustic,6uaIMhUZXMuT2Zz0vxrFiR,0.568,0.558,9,-8.553,1,0.0278,0.000331,0.116,0.287,104.991,239960,4
3,Broken Back,"Dear Misfortune, Mother of Joy - EP",Young Souls,7p2RlyF403KmPmZm2TxKl3,0.702,0.606,8,-10.285,0,0.0443,0.0,0.145,0.463,99.95,219373,4
4,José González,Stay Alive (From The Secret Life Of Walter Mitty),Stay Alive,0ZNYGrmcehorhh9JOeg5Iv,0.714,0.306,6,-12.111,1,0.0388,0.168,0.0924,0.314,129.984,266907,4


In [22]:
artists_ids

['6VCoG3MG7ZKRxDjaYOvtrF',
 '0jRqFvRKCDryHOgrgFqsKG',
 '0L8ExT028jH3ddEcZwqJJ5',
 '4BxCuXFJrSWGi1KHcVqaU4',
 '5YA1c6yVkPnflTLMfOgjzc',
 '4gzpq5DPGxSnKTe4SA8HAU',
 '6r1Xmz7YUD4z0VRUoGm8XN',
 '4HV7yKF3SRpY6I0gxu7hm9',
 '3WrFJ7ztbogyGnTHbHJFl2',
 '2WjvvwAX0mdWwq3aFuUdtc',
 '5sWHDYs0csV6RS48xBl0tH',
 '0LcJLqbBmaGUft1e9Mm8HV',
 '0SwO7SWeDHJijQ3XNS7xEE',
 '5KORW1YGiERJqfuYjXMCyz',
 '7jdFEYD2LTYjfwxOdlVjmc',
 '0epOFNiUfyON9EYx7Tpr6V',
 '1kDGbuxWknIKx4FlgWxiSp',
 '5f7VJjfbwm532GiveGC0ZK',
 '6u11Qbko2N2hP4lTBYjX86',
 '10exVja0key0uqUkk6LJRT',
 '0bzfPKdbXL5ezYW2z3UGQj',
 '79hrYiudVcFyyxyJW0ipTy',
 '0ZR3khgseh3xy3s8iBYWrI',
 '6n9XmMc3mX18mrTHYOCPIq',
 '6VxCmtR7S3yz4vnzsJqhSV',
 '536BYVgOnRky0xjsPT96zl',
 '0MlOPi3zIDMVrfA9R04Fe3',
 '1cwOthlzLBwN8Imbq7P71H',
 '59wfkuBoNyhDMQGCljbUbA',
 '0RqtSIYZmd4fiBKVFqyIqD',
 '3yDIp0kaq9EFKe07X1X2rz',
 '2xaAOVImG2O6lURwqperlD',
 '4qNZw759AdHSMugyDMrIlF',
 '26T3LtbuGT1Fu9m0eRq5X3',
 '5nCi3BB41mBaMH9gfr6Su0',
 '2IS9rTtWwEfUEY4u3mecIT',
 '49zZ2lRNpfwWfUnASUxCYW',
 

In [31]:
results = sp.artist_top_tracks('6VCoG3MG7ZKRxDjaYOvtrF')
results

{'tracks': [{'album': {'album_type': 'album',
    'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6VCoG3MG7ZKRxDjaYOvtrF'},
      'href': 'https://api.spotify.com/v1/artists/6VCoG3MG7ZKRxDjaYOvtrF',
      'id': '6VCoG3MG7ZKRxDjaYOvtrF',
      'name': 'Love of Lesbian',
      'type': 'artist',
      'uri': 'spotify:artist:6VCoG3MG7ZKRxDjaYOvtrF'}],
    'external_urls': {'spotify': 'https://open.spotify.com/album/1vvfLVVcU1o8sPMq25CxpF'},
    'href': 'https://api.spotify.com/v1/albums/1vvfLVVcU1o8sPMq25CxpF',
    'id': '1vvfLVVcU1o8sPMq25CxpF',
    'images': [{'height': 640,
      'url': 'https://i.scdn.co/image/ab67616d0000b273304dbb3e13db9dae376c7a51',
      'width': 640},
     {'height': 300,
      'url': 'https://i.scdn.co/image/ab67616d00001e02304dbb3e13db9dae376c7a51',
      'width': 300},
     {'height': 64,
      'url': 'https://i.scdn.co/image/ab67616d00004851304dbb3e13db9dae376c7a51',
      'width': 64}],
    'name': '1999 (o como generar incendios de

In [62]:
def artist_top_tracks(artist_id):
    top_tracks = []
    results = sp.artist_top_tracks(artist_id)
    tracks = results['tracks']
    for t in tracks:
        top_tracks.append(t['id'])
    
    return top_tracks

In [63]:
top_tracks = artist_top_tracks("6VCoG3MG7ZKRxDjaYOvtrF")

In [60]:
results_tt = sp.artist_top_tracks('6VCoG3MG7ZKRxDjaYOvtrF')
tracks_tt = results_tt['tracks']

for t in tracks_tt:
            # Create empty dict
        top_tracks_features = {}
            # Get metadata
        top_tracks_features["artist"] = t["artists"]
        top_tracks_features["album"] = t["album"]['name']
        top_tracks_features["track_name"] = t["name"]
        top_tracks_features["track_id"] = t['id']
        
        # Get audio features
        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[4:]:
            playlist_features[feature] = audio_features[feature]
            
            # Concat the dfs
        top_tracks_df = pd.DataFrame(top_tracks_features, index = [0])
        #playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)

ValueError: Shape of passed values is (2, 4), indices imply (1, 4)

In [None]:
top_tracks_df