# Lab | API wrappers - Create your collection of songs & audio features

''' 
Instructions

To move forward with the project, you need to create a collection of songs with their audio features - as large as possible!

These are the songs that we will cluster. And, later, when the user inputs a song, we will find the cluster to which the song belongs and recommend a song from the same cluster. The more songs you have, the more accurate and diverse recommendations you'll be able to give. Although... you might want to make sure the collected songs are "curated" in a certain way. Try to find playlists of songs that are diverse, but also that meet certain standards.

The process of sending hundreds or thousands of requests can take some time - it's normal if you have to wait a few minutes (or, if you're ambitious, even hours) to get all the data you need.

An idea for collecting as many songs as possible is to start with all the songs of a big, diverse playlist and then go to every artist present in the playlist and grab every song of every album of that artist. The amount of songs you'll be collecting per playlist will grow exponentially!
'''

## EXECUTING THE CONNECTION TO THE SPOTIFY API

In [166]:
!pip install spotipy



In [167]:
#pip install git+https://github.com/plamere/spotipy.git --upgrade

In [168]:
#pip show spotipy

In [169]:
import spotipy
import pandas as pd
from spotipy.oauth2 import SpotifyOAuth
from spotipy.oauth2 import SpotifyClientCredentials
from random import randint
from time import sleep

In [170]:
secrets_file = open("/Users/patrickhutsch/IH-Labs/secrets.txt","r") #"r" is for reading

In [171]:
string = secrets_file.read()

In [172]:
string.split('\n')

['cid 8dc9fc43c4744ee6ae25ea6d08192e89',
 'csecret 5ab44f1575544a72b831880e62af238c']

In [173]:
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        #print(line.split(' '))
        secrets_dict[line.split(' ')[0]]=line.split(' ')[1]

In [174]:
secrets_dict

{'cid': '8dc9fc43c4744ee6ae25ea6d08192e89',
 'csecret': '5ab44f1575544a72b831880e62af238c'}

In [175]:
#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['cid'],
                                                           client_secret=secrets_dict['csecret']))
spotify_wrapper = spotipy.Spotify(auth_manager = sp)

In [176]:
playlist = sp.user_playlist_tracks("spotify", "5K1ZnbYaYBIwof9UytdaMO")

In [177]:
playlist["total"]

590

In [178]:
playlist.keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [179]:
len(playlist["items"])

100

In [180]:
playlist['next']

'https://api.spotify.com/v1/playlists/5K1ZnbYaYBIwof9UytdaMO/tracks?offset=100&limit=100&additional_types=track'

In [181]:
#sp.next(playlist)

In [182]:
def get_playlist_tracks(playlist_id):
    results = sp.user_playlist_tracks("spotify",playlist_id)
    tracks = results['items']
    while results['next']!=None:
        results = sp.next(results)
        tracks = tracks + results['items']
        sleep(randint(1,3000)/1000) # respectful nap
    return tracks

In [183]:
#this now holds all 589 songs and their artists
#need to make it into a dataframe, once we've broken it down! 

all_tracks = get_playlist_tracks("5K1ZnbYaYBIwof9UytdaMO")
len(all_tracks)

590

In [184]:
all_tracks[0]['track'].keys()

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'episode', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track', 'track_number', 'type', 'uri'])

In [185]:
all_tracks[0]['track']['uri']

'spotify:track:0JJP0IS4w0fJx01EcrfkDe'

In [186]:
all_tracks[0]['track']['artists'][0]['name']

'All Time Low'

In [187]:
all_tracks[0]['track']['name']

'Dear Maria, Count Me In'

## Back to the Dataframe

In [188]:
title=[]
artist=[]
song_id=[]

for i in all_tracks:
    title.append(i['track']['name'])
    artist.append(i['track']['artists'][0]['name'])
    song_id.append(i['track']['id'])

In [189]:
playlist_df=pd.DataFrame({'title': title, 'artist': artist, 'song_id': song_id})

In [190]:
playlist_df

Unnamed: 0,title,artist,song_id
0,"Dear Maria, Count Me In",All Time Low,0JJP0IS4w0fJx01EcrfkDe
1,I Miss Having Sex But At Least I Don't Wanna D...,Waterparks,2Rmw7J0krEU75ffhkaK93D
2,Jamie All Over,Mayday Parade,05qCCJQJiOwvPQBb7akf1R
3,Monsters (feat. blackbear),All Time Low,0tyR7Bu9P086aWBFZ4QJoo
4,The Downfall Of Us All,A Day To Remember,1KHKeIouP04dDtl0EetgED
...,...,...,...
585,There’s Fear In Letting Go,I Prevail,2OYtcqflvzQwh3cMPmTHs4
586,MAYDAY (feat. Ryo from Crystal Lake),coldrain,2wJJNgD6GrKoQ435VxTRv6
587,Inside My Head,Until I Wake,4VXI71UKhFtyqy9lcKXa0E
588,Rebel Love Song,Black Veil Brides,3ItCQd3vvharrdNrBzZuIY


In [191]:
playlist_df.artist.value_counts()

A Day To Remember    36
I Prevail            16
Escape the Fate      16
Asking Alexandria    13
Neck Deep            13
                     ..
Nine Lashes           1
Miss May I            1
Boys Like Girls       1
Chiodos               1
Colorblind            1
Name: artist, Length: 203, dtype: int64

In [192]:
# playlist_df.isna().sum()
sum(playlist_df.duplicated())

4

In [193]:
pp_playlist=[]
for i in range(len(playlist_df)):
    pp_playlist.extend(sp.audio_features(tracks=playlist_df['song_id'][i]))

ReadTimeout: HTTPSConnectionPool(host='api.spotify.com', port=443): Read timed out. (read timeout=5)

In [194]:
song_features=pd.DataFrame.from_dict(pp_playlist)

In [195]:
song_features

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.459,0.895,2,-3.126,1,0.0805,0.007250,0.000000,0.2060,0.572,181.040,audio_features,0JJP0IS4w0fJx01EcrfkDe,spotify:track:0JJP0IS4w0fJx01EcrfkDe,https://api.spotify.com/v1/tracks/0JJP0IS4w0fJ...,https://api.spotify.com/v1/audio-analysis/0JJP...,182827,4
1,0.582,0.810,5,-4.104,0,0.1060,0.076300,0.000000,0.3510,0.874,73.954,audio_features,2Rmw7J0krEU75ffhkaK93D,spotify:track:2Rmw7J0krEU75ffhkaK93D,https://api.spotify.com/v1/tracks/2Rmw7J0krEU7...,https://api.spotify.com/v1/audio-analysis/2Rmw...,133352,4
2,0.426,0.868,1,-3.702,1,0.0470,0.000065,0.000000,0.0723,0.402,149.042,audio_features,05qCCJQJiOwvPQBb7akf1R,spotify:track:05qCCJQJiOwvPQBb7akf1R,https://api.spotify.com/v1/tracks/05qCCJQJiOwv...,https://api.spotify.com/v1/audio-analysis/05qC...,216000,4
3,0.369,0.845,0,-3.449,0,0.0611,0.049300,0.000000,0.4160,0.782,78.532,audio_features,0tyR7Bu9P086aWBFZ4QJoo,spotify:track:0tyR7Bu9P086aWBFZ4QJoo,https://api.spotify.com/v1/tracks/0tyR7Bu9P086...,https://api.spotify.com/v1/audio-analysis/0tyR...,174068,4
4,0.588,0.881,0,-4.620,1,0.0666,0.000252,0.000000,0.3260,0.709,129.947,audio_features,1KHKeIouP04dDtl0EetgED,spotify:track:1KHKeIouP04dDtl0EetgED,https://api.spotify.com/v1/tracks/1KHKeIouP04d...,https://api.spotify.com/v1/audio-analysis/1KHK...,209093,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
286,0.267,0.896,8,-3.843,0,0.0578,0.000228,0.000002,0.3200,0.391,185.031,audio_features,5EmhaiRUDW0xUUsccMJ61X,spotify:track:5EmhaiRUDW0xUUsccMJ61X,https://api.spotify.com/v1/tracks/5EmhaiRUDW0x...,https://api.spotify.com/v1/audio-analysis/5Emh...,183653,3
287,0.468,0.917,0,-5.084,0,0.0692,0.000201,0.000000,0.1170,0.523,170.235,audio_features,6vE2Kc7knCkQdL2NbJ7FvB,spotify:track:6vE2Kc7knCkQdL2NbJ7FvB,https://api.spotify.com/v1/tracks/6vE2Kc7knCkQ...,https://api.spotify.com/v1/audio-analysis/6vE2...,141177,4
288,0.361,0.871,8,-4.313,1,0.0393,0.011900,0.000000,0.3180,0.575,176.026,audio_features,6VuztO5yaKP4VjqSkZxpan,spotify:track:6VuztO5yaKP4VjqSkZxpan,https://api.spotify.com/v1/tracks/6VuztO5yaKP4...,https://api.spotify.com/v1/audio-analysis/6Vuz...,172960,4
289,0.472,0.925,6,-3.545,0,0.1500,0.000394,0.000528,0.0826,0.338,159.989,audio_features,23UoI3jlFiWdo5jadUYo69,spotify:track:23UoI3jlFiWdo5jadUYo69,https://api.spotify.com/v1/tracks/23UoI3jlFiWd...,https://api.spotify.com/v1/audio-analysis/23Uo...,267867,4


In [196]:
playlist_features = playlist_df.merge(song_features, left_on='song_id', right_on='id')

In [197]:
playlist_features

Unnamed: 0,title,artist,song_id,danceability,energy,key,loudness,mode,speechiness,acousticness,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,"Dear Maria, Count Me In",All Time Low,0JJP0IS4w0fJx01EcrfkDe,0.459,0.895,2,-3.126,1,0.0805,0.007250,...,0.2060,0.572,181.040,audio_features,0JJP0IS4w0fJx01EcrfkDe,spotify:track:0JJP0IS4w0fJx01EcrfkDe,https://api.spotify.com/v1/tracks/0JJP0IS4w0fJ...,https://api.spotify.com/v1/audio-analysis/0JJP...,182827,4
1,I Miss Having Sex But At Least I Don't Wanna D...,Waterparks,2Rmw7J0krEU75ffhkaK93D,0.582,0.810,5,-4.104,0,0.1060,0.076300,...,0.3510,0.874,73.954,audio_features,2Rmw7J0krEU75ffhkaK93D,spotify:track:2Rmw7J0krEU75ffhkaK93D,https://api.spotify.com/v1/tracks/2Rmw7J0krEU7...,https://api.spotify.com/v1/audio-analysis/2Rmw...,133352,4
2,Jamie All Over,Mayday Parade,05qCCJQJiOwvPQBb7akf1R,0.426,0.868,1,-3.702,1,0.0470,0.000065,...,0.0723,0.402,149.042,audio_features,05qCCJQJiOwvPQBb7akf1R,spotify:track:05qCCJQJiOwvPQBb7akf1R,https://api.spotify.com/v1/tracks/05qCCJQJiOwv...,https://api.spotify.com/v1/audio-analysis/05qC...,216000,4
3,Monsters (feat. blackbear),All Time Low,0tyR7Bu9P086aWBFZ4QJoo,0.369,0.845,0,-3.449,0,0.0611,0.049300,...,0.4160,0.782,78.532,audio_features,0tyR7Bu9P086aWBFZ4QJoo,spotify:track:0tyR7Bu9P086aWBFZ4QJoo,https://api.spotify.com/v1/tracks/0tyR7Bu9P086...,https://api.spotify.com/v1/audio-analysis/0tyR...,174068,4
4,The Downfall Of Us All,A Day To Remember,1KHKeIouP04dDtl0EetgED,0.588,0.881,0,-4.620,1,0.0666,0.000252,...,0.3260,0.709,129.947,audio_features,1KHKeIouP04dDtl0EetgED,spotify:track:1KHKeIouP04dDtl0EetgED,https://api.spotify.com/v1/tracks/1KHKeIouP04d...,https://api.spotify.com/v1/audio-analysis/1KHK...,209093,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
288,Stay,No Resolve,5EmhaiRUDW0xUUsccMJ61X,0.267,0.896,8,-3.843,0,0.0578,0.000228,...,0.3200,0.391,185.031,audio_features,5EmhaiRUDW0xUUsccMJ61X,spotify:track:5EmhaiRUDW0xUUsccMJ61X,https://api.spotify.com/v1/tracks/5EmhaiRUDW0x...,https://api.spotify.com/v1/audio-analysis/5Emh...,183653,3
289,STAY,Fame on Fire,6vE2Kc7knCkQdL2NbJ7FvB,0.468,0.917,0,-5.084,0,0.0692,0.000201,...,0.1170,0.523,170.235,audio_features,6vE2Kc7knCkQdL2NbJ7FvB,spotify:track:6vE2Kc7knCkQdL2NbJ7FvB,https://api.spotify.com/v1/tracks/6vE2Kc7knCkQ...,https://api.spotify.com/v1/audio-analysis/6vE2...,141177,4
290,On My Own,Ashes Remain,6VuztO5yaKP4VjqSkZxpan,0.361,0.871,8,-4.313,1,0.0393,0.011900,...,0.3180,0.575,176.026,audio_features,6VuztO5yaKP4VjqSkZxpan,spotify:track:6VuztO5yaKP4VjqSkZxpan,https://api.spotify.com/v1/tracks/6VuztO5yaKP4...,https://api.spotify.com/v1/audio-analysis/6Vuz...,172960,4
291,Bulls In The Bronx,Pierce The Veil,23UoI3jlFiWdo5jadUYo69,0.472,0.925,6,-3.545,0,0.1500,0.000394,...,0.0826,0.338,159.989,audio_features,23UoI3jlFiWdo5jadUYo69,spotify:track:23UoI3jlFiWdo5jadUYo69,https://api.spotify.com/v1/tracks/23UoI3jlFiWd...,https://api.spotify.com/v1/audio-analysis/23Uo...,267867,4


In [198]:
playlist_features=playlist_features.drop(['id'],axis=1)

In [199]:
pl1 = get_playlist_tracks("5K1ZnbYaYBIwof9UytdaMO")
len(all_tracks)

590

In [228]:
pl2 = get_playlist_tracks("0yqmXcvBWRiYZ3Yd2GYRAz")

In [229]:
pl3 = get_playlist_tracks("36KyxKPGjsAeZTWSV6xTEi")

In [230]:
pl4 = get_playlist_tracks("37i9dQZF1DX0Nb1cyWS6Tq")

In [231]:
pl5 = get_playlist_tracks("37i9dQZF1DXbB7yFaZiAQX")

In [232]:
pl6 = get_playlist_tracks("2gNTpZxVxW4KLyceVYFgms")

In [233]:
pl7 = get_playlist_tracks("37i9dQZF1DWTSKFpOdYF1r")

In [234]:
pl8 = get_playlist_tracks("37i9dQZF1EIgeaSx7BfoU1")

In [235]:
pl9 = get_playlist_tracks("37i9dQZF1DWYwMzXER4RFF")

In [236]:
pl10 = get_playlist_tracks("37i9dQZF1DWSDoVybeQisg")

In [237]:
pl11 = get_playlist_tracks("3e8CT1OD4Atp0ZQxVMaWSS")

In [238]:
pl12 = get_playlist_tracks("37i9dQZF1DXdTCdwCKzXwo")

In [239]:
pl13 = get_playlist_tracks("37i9dQZF1DX26DKvjp0s9M")

In [240]:
pl14 = get_playlist_tracks("37i9dQZF1DWUoqEG4WY6ce")

In [241]:
pl15 = get_playlist_tracks("6IWLXV1jdcKKEH6YSc2yPv")

In [242]:
pl16 = get_playlist_tracks("5cs51izx3ZXvmupfUOlPpf")

In [243]:
pl17 = get_playlist_tracks("0ZRwdOOqY6JzKIhkDKf7Vp")

In [244]:
pl18 = get_playlist_tracks("6xelnz6oVa1sM4H81Mklj3")

In [245]:
pl19 = get_playlist_tracks("6zpz6PsWOvy1ENtdI5fOTh")

In [246]:
pl20 = get_playlist_tracks("6IziTIPI9s0u1GXGKycXHg")

In [247]:
pl21 = get_playlist_tracks("43ocaXfwmx3w3p9ClvW7zG")

In [248]:
pl22 = get_playlist_tracks("6IziTIPI9s0u1GXGKycXHg")

In [249]:
pl23 = get_playlist_tracks("6zpz6PsWOvy1ENtdI5fOTh")

In [250]:
pl24 = get_playlist_tracks("6zpz6PsWOvy1ENtdI5fOTh")

In [251]:
postpunk = pl2 + pl3 + pl4 + pl5 + pl6 + pl7 + pl8 + pl9 + pl10 + pl11 + pl12 + pl13 + pl14 + pl15 + pl16 + pl17 + pl18 + pl19 + pl20 + pl21 + pl22 + pl23 + pl24
len(postpunk)

7899

In [252]:
display(postpunk)

[{'added_at': '2022-08-17T23:27:18Z',
  'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/qbuq7urj4dn62s8tfmti2ype1'},
   'href': 'https://api.spotify.com/v1/users/qbuq7urj4dn62s8tfmti2ype1',
   'id': 'qbuq7urj4dn62s8tfmti2ype1',
   'type': 'user',
   'uri': 'spotify:user:qbuq7urj4dn62s8tfmti2ype1'},
  'is_local': False,
  'primary_color': None,
  'track': {'album': {'album_type': 'album',
    'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3QUZOEl7WwFC0Hfl9z4Mhk'},
      'href': 'https://api.spotify.com/v1/artists/3QUZOEl7WwFC0Hfl9z4Mhk',
      'id': '3QUZOEl7WwFC0Hfl9z4Mhk',
      'name': 'John Zacherle',
      'type': 'artist',
      'uri': 'spotify:artist:3QUZOEl7WwFC0Hfl9z4Mhk'}],
    'available_markets': ['AD',
     'AE',
     'AG',
     'AL',
     'AM',
     'AO',
     'AR',
     'AT',
     'AU',
     'AZ',
     'BA',
     'BB',
     'BD',
     'BE',
     'BF',
     'BG',
     'BH',
     'BI',
     'BJ',
     'BN',
     'BO',
   