In [None]:
# https://spotipy.readthedocs.io/en/2.19.0/

# install spotipy
# As google colab starts each session like a new computer, we must install this each time
# If you are working on your local machine, you only need to install once
!pip install spotipy --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting spotipy
  Downloading spotipy-2.20.0-py3-none-any.whl (27 kB)
Collecting requests>=2.25.0
  Downloading requests-2.28.1-py3-none-any.whl (62 kB)
[K     |████████████████████████████████| 62 kB 951 kB/s 
[?25hCollecting redis>=3.5.3
  Downloading redis-4.3.4-py3-none-any.whl (246 kB)
[K     |████████████████████████████████| 246 kB 10.7 MB/s 
[?25hCollecting urllib3>=1.26.0
  Downloading urllib3-1.26.12-py2.py3-none-any.whl (140 kB)
[K     |████████████████████████████████| 140 kB 45.5 MB/s 
Collecting deprecated>=1.2.3
  Downloading Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: urllib3, deprecated, requests, redis, spotipy
  Attempting uninstall: urllib3
    Found existing installation: urllib3 1.24.3
    Uninstalling urllib3-1.24.3:
      Successfully uninstalled urllib3-1.24.3
  Attempting uninstall: requests
    Found existing installati

In [None]:
import pandas as pd

## Requests

The requests library is the de facto standard for making HTTP requests in Python. It abstracts the complexities of making requests behind a beautiful, simple API so that you can focus on interacting with services and consuming data in your application.

https://docs.python-requests.org/en/latest/

When we make a request, we are normally returned a number - this number represents whether we received the information we wanted or not. If you get a number that you don't understand, these cats will help you: https://http.cat/

More often than not you'll receive:

200: Success!

401: Unauthorized client error status: lack of valid authentication credentials

403: The server understood the request but refuses to authorize it

In [None]:
import requests

google = requests.get("https://developers.google.com")
print("Google:", google.status_code)

NBA = requests.get("https://api.sportsdata.io/api/nba/fantasy/json/CurrentSeason")
print("NBA:", NBA.status_code) 

rotten_tomato = requests.get("http://api.rottentomatoes.com/api/public/v1.0/lists/movies/box_office.json")
print("Rotten Tomatoes:", rotten_tomato.status_code)

Google: 200
NBA: 401
Rotten Tomatoes: 403


## JSON

### Intro - making a request and viewing the JSON

https://docs.python.org/3/library/json.html

Since its inception, JSON has quickly become the de facto standard for information exchange. JSON supports primitive types, like strings and numbers, as well as nested lists and objects. It looks like nested python dictionaries:

`{"firstname": "Harry",
"lastname": "Noah",
"city": "Berlin",
"dogs": [{"name": "rover", "breed": "labrador"}, {"name": "pip", "breed": "spaniel"}],
"cars": "none"}`

In [None]:
import json

# Make the request
response = requests.get("https://jsonplaceholder.typicode.com/todos")

In [None]:
# Check the HTTP code
response

<Response [200]>

In [None]:
# Example of how a JSON looks

# View API response as a JSON
response.json()

### GitHub API - Accessing the data in the JSON

Now that we know
- what an API is
- how to request  information from one (requests)
- how the information will be delivered to us (json)

Let's look how we can use this information. We will first look at how we can access particular values with the Json. Then we will look at a couple of methods to make a dataframe from the JSON.

Github has many APIs. Here we'll look at a couple

In [None]:
# Very basic API (a string returned)

# GitHub's Zen API produces a new inspirational phrase every 30 second
# Run this cell again in 30 second to see a different output

resp = requests.get("https://api.github.com/zen")
resp.text

'Keep it logically awesome.'

In [None]:
# More complex API (a large json returned)

# Github's Event API shows the events that power the various activity streams on the site
# In other words, what's happening on Github, who's updating what?

response = requests.get('https://api.github.com/events')
github_response = response.json()
github_response

In [None]:
# How many events are we looking at?
len(github_response)

30

In [None]:
# What are the keys in the 1st event
github_response[0].keys()

dict_keys(['id', 'type', 'actor', 'repo', 'payload', 'public', 'created_at'])

In [None]:
# We can see that 'repo' is another subdictionary
# What are the keys in the value of 'repo'
github_response[0]['repo'].keys()

dict_keys(['id', 'name', 'url'])

In [None]:
# What's the value for the key 'name' in 'repo'
github_response[0]['repo']['name']

'xowallets/Q9KcYbjUKo'

#### Transforming a JSON into a DataFrame

##### Option 1: pd.DataFrame()

In [None]:
# turn it into a pandas dataframe
pd.DataFrame(github_response)

Unnamed: 0,id,type,actor,repo,payload,public,created_at,org
0,24390537021,CreateEvent,"{'id': 114884843, 'login': 'xowallets', 'displ...","{'id': 545543190, 'name': 'xowallets/Q9KcYbjUK...","{'ref': None, 'ref_type': 'repository', 'maste...",True,2022-10-04T14:55:12Z,
1,24390537089,PullRequestEvent,"{'id': 5615666, 'login': 'jko314', 'display_lo...","{'id': 374737509, 'name': 'Terracotta-OSS/dso'...","{'action': 'closed', 'number': 89, 'pull_reque...",True,2022-10-04T14:55:12Z,"{'id': 4680389, 'login': 'Terracotta-OSS', 'gr..."
2,24390537096,PullRequestEvent,"{'id': 84767360, 'login': 'dhthinh7', 'display...","{'id': 489880536, 'name': 'dhthinh7/React', 'u...","{'action': 'closed', 'number': 3, 'pull_reques...",True,2022-10-04T14:55:12Z,
3,24390536974,PushEvent,"{'id': 13166525, 'login': 'fredericobormann', ...","{'id': 210604976, 'name': 'lawandorga/lawandor...","{'push_id': 11222157390, 'size': 1, 'distinct_...",True,2022-10-04T14:55:12Z,"{'id': 81813394, 'login': 'lawandorga', 'grava..."
4,24390537071,PushEvent,"{'id': 113664282, 'login': 'Lamech74', 'displa...","{'id': 545511580, 'name': 'Lamech74/alx-zero_d...","{'push_id': 11222157436, 'size': 1, 'distinct_...",True,2022-10-04T14:55:12Z,
5,24390536997,PushEvent,"{'id': 79056677, 'login': 'n0eyes', 'display_l...","{'id': 516495228, 'name': 'n0eyes/Packman-Clie...","{'push_id': 11222157387, 'size': 6, 'distinct_...",True,2022-10-04T14:55:12Z,
6,24390537053,PushEvent,"{'id': 105224534, 'login': 'Emmyrald', 'displa...","{'id': 545468766, 'name': 'Emmyrald/alx-zero_d...","{'push_id': 11222157469, 'size': 2, 'distinct_...",True,2022-10-04T14:55:12Z,
7,24390536960,CreateEvent,"{'id': 114422033, 'login': 'PGSLOTTH888', 'dis...","{'id': 545543069, 'name': 'PGSLOTTH888/lEr2jKj...","{'ref': 'master', 'ref_type': 'branch', 'maste...",True,2022-10-04T14:55:12Z,
8,24390537011,PullRequestEvent,"{'id': 22329677, 'login': 'christocarr', 'disp...","{'id': 137716163, 'name': 'christocarr/contact...","{'action': 'closed', 'number': 38, 'pull_reque...",True,2022-10-04T14:55:12Z,
9,24390537015,CreateEvent,"{'id': 112966868, 'login': 'fedorks', 'display...","{'id': 545543193, 'name': 'fedorks/HomeWork6_0...","{'ref': None, 'ref_type': 'repository', 'maste...",True,2022-10-04T14:55:12Z,


##### Option 2: pd.json_normalize()
You may notice above that many columns still contain dictionaries as values. We can correct this using json_normalize()

In [None]:
#https://pandas.pydata.org/docs/reference/api/pandas.json_normalize.html

pd.json_normalize(github_response)

Unnamed: 0,id,type,public,created_at,actor.id,actor.login,actor.display_login,actor.gravatar_id,actor.url,actor.avatar_url,...,payload.review.user.site_admin,payload.review.body,payload.review.commit_id,payload.review.submitted_at,payload.review.state,payload.review.html_url,payload.review.pull_request_url,payload.review.author_association,payload.review._links.html.href,payload.review._links.pull_request.href
0,24390537021,CreateEvent,True,2022-10-04T14:55:12Z,114884843,xowallets,xowallets,,https://api.github.com/users/xowallets,https://avatars.githubusercontent.com/u/114884...,...,,,,,,,,,,
1,24390537089,PullRequestEvent,True,2022-10-04T14:55:12Z,5615666,jko314,jko314,,https://api.github.com/users/jko314,https://avatars.githubusercontent.com/u/5615666?,...,,,,,,,,,,
2,24390537096,PullRequestEvent,True,2022-10-04T14:55:12Z,84767360,dhthinh7,dhthinh7,,https://api.github.com/users/dhthinh7,https://avatars.githubusercontent.com/u/84767360?,...,,,,,,,,,,
3,24390536974,PushEvent,True,2022-10-04T14:55:12Z,13166525,fredericobormann,fredericobormann,,https://api.github.com/users/fredericobormann,https://avatars.githubusercontent.com/u/13166525?,...,,,,,,,,,,
4,24390537071,PushEvent,True,2022-10-04T14:55:12Z,113664282,Lamech74,Lamech74,,https://api.github.com/users/Lamech74,https://avatars.githubusercontent.com/u/113664...,...,,,,,,,,,,
5,24390536997,PushEvent,True,2022-10-04T14:55:12Z,79056677,n0eyes,n0eyes,,https://api.github.com/users/n0eyes,https://avatars.githubusercontent.com/u/79056677?,...,,,,,,,,,,
6,24390537053,PushEvent,True,2022-10-04T14:55:12Z,105224534,Emmyrald,Emmyrald,,https://api.github.com/users/Emmyrald,https://avatars.githubusercontent.com/u/105224...,...,,,,,,,,,,
7,24390536960,CreateEvent,True,2022-10-04T14:55:12Z,114422033,PGSLOTTH888,PGSLOTTH888,,https://api.github.com/users/PGSLOTTH888,https://avatars.githubusercontent.com/u/114422...,...,,,,,,,,,,
8,24390537011,PullRequestEvent,True,2022-10-04T14:55:12Z,22329677,christocarr,christocarr,,https://api.github.com/users/christocarr,https://avatars.githubusercontent.com/u/22329677?,...,,,,,,,,,,
9,24390537015,CreateEvent,True,2022-10-04T14:55:12Z,112966868,fedorks,fedorks,,https://api.github.com/users/fedorks,https://avatars.githubusercontent.com/u/112966...,...,,,,,,,,,,


#### Selecting only certain values by iterating with a for loop

If we only want to select certain parts of the JSON
- Option 1: make a DataFrame and drop the rest
- Option 2: Use a for loop to exract only the required information

In [None]:
# login - first value
github_response[0]['actor']['login']

'xowallets'

In [None]:
# repo - first value
github_response[0]['repo']['name']

'xowallets/Q9KcYbjUKo'

In [None]:
# event_type - first value
github_response[0]['type']

'CreateEvent'

In [None]:
# https://www.w3schools.com/python/python_for_loops.asp
# https://www.w3schools.com/python/ref_func_range.asp

# empty lists that the loop will fill with values
login = []
repo = []
event_type = []

for i in range(len(github_response)):
    # add the login value to the login list
    login.append(github_response[i]['actor']['login'])
    # add the repo name to the repo list
    repo.append(github_response[i]['repo']['name'])
    # add the event type to the event_type list
    event_type.append(github_response[i]['type'])

In [None]:
# Let's have a look at the login list
login

['xowallets',
 'jko314',
 'dhthinh7',
 'fredericobormann',
 'Lamech74',
 'n0eyes',
 'Emmyrald',
 'PGSLOTTH888',
 'christocarr',
 'fedorks',
 'uftd222',
 'github-actions[bot]',
 'emeagenciadigital',
 'henriqueramosqs',
 'MixedMachine',
 'quentinneyraud',
 'Princelet',
 'fixin4488',
 'Ashu-006',
 'volmen3',
 'Benio10ebon',
 'zhihanii',
 'Angie-Santos',
 'srabin1',
 'geirolz',
 'Nelnaji',
 'MrRhuezzler',
 'aimemalaika',
 'wyb1',
 'IsabelArellano']

In [None]:
# Let's have a look at the repo list
repo

['xowallets/Q9KcYbjUKo',
 'Terracotta-OSS/dso',
 'dhthinh7/React',
 'lawandorga/lawandorga-backend-service',
 'Lamech74/alx-zero_day',
 'n0eyes/Packman-Client',
 'Emmyrald/alx-zero_day',
 'PGSLOTTH888/lEr2jKjoSA',
 'christocarr/contacts',
 'fedorks/HomeWork6_031022',
 'uftd222/HQu72hpS6v',
 'brochjad/pub_hofs',
 'emeagenciadigital/static-eme',
 'henriqueramosqs/Fire-Emblem',
 'MixedMachine/SimpleAuthBackend',
 'quentinneyraud/nuxt-templates',
 'Princelet/Car_Practice',
 'fixin4488/XhTH1v4kxm',
 'Ashu-006/Happy-Birthday-Card',
 'volmen3/SPHinXsys-source',
 'Benio10ebon/Fnf-vs-Os-Vigilantes-Psych-engine-port',
 'dcloudio/uni-app',
 'amappola7/social-network',
 'srabin1/starter-web',
 'geirolz/fp-microservice',
 'Nelnaji/Himara',
 'MrRhuezzler/Programs',
 'henrycode460/myblog',
 'gardener/gardener',
 'IsabelArellano/Aletheia']

In [None]:
# Let's have a look at the event_type list
event_type

['CreateEvent',
 'PullRequestEvent',
 'PullRequestEvent',
 'PushEvent',
 'PushEvent',
 'PushEvent',
 'PushEvent',
 'CreateEvent',
 'PullRequestEvent',
 'CreateEvent',
 'CreateEvent',
 'PushEvent',
 'PushEvent',
 'PushEvent',
 'PushEvent',
 'PushEvent',
 'PushEvent',
 'CreateEvent',
 'CreateEvent',
 'PushEvent',
 'PushEvent',
 'WatchEvent',
 'PushEvent',
 'PushEvent',
 'PullRequestEvent',
 'PushEvent',
 'PushEvent',
 'PullRequestReviewEvent',
 'PullRequestReviewEvent',
 'CreateEvent']

### Intenational Space Station API - just another cool API

Send a simple `get` request to know where the ISS is right now.

Docs here: http://open-notify.org/Open-Notify-API/ISS-Location-Now/

In [None]:
url = "http://api.open-notify.org/iss-now.json"

In [None]:
response = requests.get(url)

In [None]:
response.json()

{'message': 'success',
 'timestamp': 1664895612,
 'iss_position': {'latitude': '-8.5030', 'longitude': '102.8478'}}

## Spotipy - now that you know all about APIs, let's use that knowledge on something fun

Spotify has an API that allows users to gather information about songs and even interact with other users and playlists. To make their usage in Python easier, someone created `spotipy`, a library with some convenient functions to send requests and collect data.

Create / log into an Spotify account (https://developer.spotify.com/dashboard/login) and follow these steps (only the "Register your App" section): https://developer.spotify.com/documentation/general/guides/authorization/app-settings/

#### Authentification

With most APIs we need to autheticate ourself. This is often done with a username and password. You will likely use a different username and password for most APIs, so make sure you're using a password manager, or keeping everything written down somewhere safe.

If ever you'd like more information about spotipy [here are the docs](https://spotipy.readthedocs.io/en/2.16.1/)

In [None]:
# import libraries
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [None]:
#Initialize SpotiPy with user credentias
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
    client_id="82d10cbeab244ba885a29af43becd014",
    client_secret="b23ecbf78eb743b38161dae226870f4c"))


In [None]:
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
    client_id="4f3178f82dd54838a2e06c9d4003a726",
    client_secret="25b63863e2674da1a8a914ff62b53d04"))

#### Searching songs with 'queries' with `sp.search`

This method allows you to find songs with using Spotify's search engine. That's convenient when you don't have the exact "id" of a song.

In [None]:
# search for 'Lady Gaga', restricted to the first 10 results

results = sp.search(q="Lady Gaga", limit = 10)

Explore the object returned by the request. As it's a dictionary (with nested dictionaries inside), using `.keys` is a great way to see what's in there:

In [None]:
results.keys()

dict_keys(['tracks'])

In [None]:
# only one key makes it seem like there'll be many subdictionaries, let's delve deeper
results["tracks"].keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

This is the url of your request:

In [None]:
# You can play the track by pasting the url in your browser

# We can explore further by adding keys one after the other

results["tracks"]["href"]

'https://api.spotify.com/v1/search?query=Lady+Gaga&type=track&offset=0&limit=10'

This is the name of the first song returned by the API:

In [None]:
results["tracks"]["items"][0]["name"]

'Bad Romance'

As one song can have many artists, the artists are returned as a list: note the square brackets.

In [None]:
results["tracks"]["items"][0]["artists"]

[{'external_urls': {'spotify': 'https://open.spotify.com/artist/1HY2Jd0NmPuamShAr6KMms'},
  'href': 'https://api.spotify.com/v1/artists/1HY2Jd0NmPuamShAr6KMms',
  'id': '1HY2Jd0NmPuamShAr6KMms',
  'name': 'Lady Gaga',
  'type': 'artist',
  'uri': 'spotify:artist:1HY2Jd0NmPuamShAr6KMms'}]

There are some other interesting features contained in the search results:

In [None]:
# https://developer.spotify.com/documentation/web-api/reference/#/operations/get-several-tracks
# The popularity of the track. The value will be between 0 and 100, with 100 being the most popular.

results["tracks"]["items"][0]["popularity"]

83

This is how Spotify identifies individual songs: with a Uniform Resource Identifier  or `uri`. (the `id` and the `url` are also ways to identify each song uniquely).

In [None]:
results["tracks"]["items"][0]["uri"]

'spotify:track:0SiywuOBRcynK0uKGWdCnn'

Here we look for 10 songs by the Red Hot Chilli Peppers and store the `uri` of the songs and their names.

In [None]:
# send request and store the response
red_hot = sp.search(q="Red hot chili peppers", limit=10)

# initialize empty lists that we will fill with information from our loop
list_of_uri = []
list_of_song_names = []

# iterate through the "items" (the songs) 
# and append the "uri" and the "name" to the lists we created
for item in red_hot["tracks"]["items"]:
    list_of_uri.append(item["uri"])
    list_of_song_names.append(item["name"])

# print results
print(list_of_uri)
print("\n")
print(list_of_song_names)

['spotify:track:3d9DChrdc6BOeFsbrZ3Is0', 'spotify:track:3xJu5hrOU9OvFQSGLQiwQS', 'spotify:track:64BbK9SFKH2jk86U3dGj2P', 'spotify:track:3ZOEytgrvLwQaqXreDs2Jx', 'spotify:track:4dzbGvxqQ1DsF6m6RUlPwg', 'spotify:track:48UPSzbZjgc449aqz8bxox', 'spotify:track:10Nmj3JCNoMeBQ87uw5j8k', 'spotify:track:1G391cbiT3v3Cywg8T7DM1', 'spotify:track:0Kojfmpnf0A2yC1zyv39Zx', 'spotify:track:2aibwv5hGXSgw7Yru8IYTO']


['Under the Bridge', 'Anthony Kiedis', 'Otherside', "Can't Stop", 'Pedigree', 'Californication', 'Dani California', 'Scar Tissue', 'Buried Alive', 'Snow (Hey Oh)']


#### Searching multiple artists

Here we first create a list of artists we want to gather songs from. Then we iterate through them and append the results into a big list called `results`.

In [None]:
artists = ["Red hot chili peppers", "SCARR", "Whitney Houston"]

In [None]:
results = []

for artist in artists:
    results.append(sp.search(q=artist, limit=10)) 

In [None]:
# Let's look at the second element in the results list
results[1]

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=SCARR&type=track&offset=0&limit=10',
  'items': [{'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/4fxd5Ee7UefO4CUXgwJ7IP'},
       'href': 'https://api.spotify.com/v1/artists/4fxd5Ee7UefO4CUXgwJ7IP',
       'id': '4fxd5Ee7UefO4CUXgwJ7IP',
       'name': 'Giveon',
       'type': 'artist',
       'uri': 'spotify:artist:4fxd5Ee7UefO4CUXgwJ7IP'}],
     'available_markets': ['AD',
      'AE',
      'AG',
      'AL',
      'AM',
      'AO',
      'AR',
      'AT',
      'AU',
      'AZ',
      'BA',
      'BB',
      'BD',
      'BE',
      'BF',
      'BG',
      'BH',
      'BI',
      'BJ',
      'BN',
      'BO',
      'BR',
      'BS',
      'BT',
      'BW',
      'BY',
      'BZ',
      'CA',
      'CD',
      'CG',
      'CH',
      'CI',
      'CL',
      'CM',
      'CO',
      'CR',
      'CV',
      'CW',
      'CY',
      'CZ',
      'DE',
      'DJ',
   

We can iterate through the `results` list and get just the names of all the songs:

In [None]:
song_names = []

for result in results:
    for item in result["tracks"]["items"]:
        song_names.append(item["name"])

In [None]:
song_names

['Under the Bridge',
 'Anthony Kiedis',
 'Otherside',
 "Can't Stop",
 'Pedigree',
 'Californication',
 'Dani California',
 'Scar Tissue',
 'Buried Alive',
 'Snow (Hey Oh)',
 'Scarred',
 'Emotionally Scarred',
 'Scary Garry',
 'Scarred From Love',
 'SCARR',
 'Scarred',
 'MJ (feat. Quezz Ruthless)',
 'SoIcyBoyz 2 (feat. Pooh Shiesty, Foogiano & Tay Keith)',
 "Scarred Baby's",
 'Fantasy (feat. Offset)',
 'I Wanna Dance with Somebody (Who Loves Me)',
 'I Will Always Love You',
 'Higher Love',
 'How Will I Know',
 'How Will I Know',
 'I Have Nothing',
 'Greatest Love of All',
 'Saving All My Love for You',
 'You Give Good Love',
 'Where Do Broken Hearts Go']

### Playlists

Using spotipy we can both build and read spotify playlists. Today we will only show you how to read information from a playlist. However, if you wish to build one, we strongly encourage you read the [documentation](https://spotipy.readthedocs.io/en/2.19.0/) and explore further.

In [None]:
my_playlist = sp.user_playlist_tracks(user="spotify", playlist_id="spotify:playlist:0ce6Rmxf7QXroqa1wzjWY8")

Extract songs ID from a playlist

In [None]:
my_playlist

{'href': 'https://api.spotify.com/v1/playlists/0ce6Rmxf7QXroqa1wzjWY8/tracks?offset=0&limit=100&additional_types=track',
 'items': [{'added_at': '2021-09-22T07:34:05Z',
   'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/gperdigo'},
    'href': 'https://api.spotify.com/v1/users/gperdigo',
    'id': 'gperdigo',
    'type': 'user',
    'uri': 'spotify:user:gperdigo'},
   'is_local': False,
   'primary_color': None,
   'track': {'album': {'album_type': 'single',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/579T5fNgfbDetyamUTAetw'},
       'href': 'https://api.spotify.com/v1/artists/579T5fNgfbDetyamUTAetw',
       'id': '579T5fNgfbDetyamUTAetw',
       'name': 'S+C+A+R+R',
       'type': 'artist',
       'uri': 'spotify:artist:579T5fNgfbDetyamUTAetw'}],
     'available_markets': ['AD',
      'AE',
      'AG',
      'AL',
      'AM',
      'AO',
      'AR',
      'AT',
      'AU',
      'AZ',
      'BA',
      'BB',
      'BD',
    

In [None]:
my_playlist["items"][0]["track"]["uri"]

'spotify:track:5Tnx4R7Gwj1LZsfssfzchh'

### Audio features

You can check here an explanation of the audio features [here](https://developer.spotify.com/documentation/web-api/reference/tracks/get-audio-features/)

In [None]:
sp.audio_features("spotify:track:6Sy9BUbgFse0n0LPA5lwy5")

[{'danceability': 0.528,
  'energy': 0.965,
  'key': 11,
  'loudness': -7.984,
  'mode': 0,
  'speechiness': 0.0465,
  'acousticness': 0.141,
  'instrumentalness': 0.985,
  'liveness': 0.0797,
  'valence': 0.587,
  'tempo': 136.065,
  'type': 'audio_features',
  'id': '6Sy9BUbgFse0n0LPA5lwy5',
  'uri': 'spotify:track:6Sy9BUbgFse0n0LPA5lwy5',
  'track_href': 'https://api.spotify.com/v1/tracks/6Sy9BUbgFse0n0LPA5lwy5',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/6Sy9BUbgFse0n0LPA5lwy5',
  'duration_ms': 225493,
  'time_signature': 4}]

### Creating a function that takes a song name and returns its audio features 

In [None]:
list_of_songs = []

def song_features(human_song_title):
    # search for the song title you enter into the function, limited to the first 10 results
    results = sp.search(q=human_song_title, limit = 10)['tracks']['items']
    # create a loop, so we only select the parts of the json we need
    for i in results:
        # empty dictionary to be filled with the information below
        track_dict = {}
        # add the key artist and a corresponding value to the dictionary
        track_dict['Artist'] = i['artists'][0]['name']
        # add the key title and the corresponding value to the dictionary
        track_dict['Title'] = i['name'] 
        # add the key album and the corresponding value to the dictionary
        track_dict['Album'] = i['album']['name']
        # add the key audio description and the corresponding value to the dictionary
        track_dict['Audio Description'] = sp.audio_features(i['id'])
        # add the dictionary to the list list_of_songs
        list_of_songs.append(track_dict)
    # output list_of_songs 
    return list_of_songs

# call the function with a song to test
song_features("Under the Bridge")

[{'Artist': 'Red Hot Chili Peppers',
  'Title': 'Under the Bridge',
  'Album': 'Blood Sugar Sex Magik (Deluxe Edition)',
  'Audio Description': [{'danceability': 0.559,
    'energy': 0.345,
    'key': 4,
    'loudness': -13.496,
    'mode': 1,
    'speechiness': 0.0459,
    'acousticness': 0.0576,
    'instrumentalness': 0.000105,
    'liveness': 0.141,
    'valence': 0.458,
    'tempo': 84.581,
    'type': 'audio_features',
    'id': '3d9DChrdc6BOeFsbrZ3Is0',
    'uri': 'spotify:track:3d9DChrdc6BOeFsbrZ3Is0',
    'track_href': 'https://api.spotify.com/v1/tracks/3d9DChrdc6BOeFsbrZ3Is0',
    'analysis_url': 'https://api.spotify.com/v1/audio-analysis/3d9DChrdc6BOeFsbrZ3Is0',
    'duration_ms': 264307,
    'time_signature': 4}]},
 {'Artist': 'owlh',
  'Title': 'Under the Bridge',
  'Album': 'cover to cover',
  'Audio Description': [{'danceability': 0.781,
    'energy': 0.355,
    'key': 9,
    'loudness': -8.158,
    'mode': 1,
    'speechiness': 0.0376,
    'acousticness': 0.471,
    'in

In [None]:
# make a dataframe from the list of songs created in the function above
df = pd.DataFrame(list_of_songs)

df

Unnamed: 0,Artist,Title,Album,Audio Description
0,Red Hot Chili Peppers,Under the Bridge,Blood Sugar Sex Magik (Deluxe Edition),"[{'danceability': 0.559, 'energy': 0.345, 'key..."
1,owlh,Under the Bridge,cover to cover,"[{'danceability': 0.781, 'energy': 0.355, 'key..."
2,Adele,Water Under the Bridge,25,"[{'danceability': 0.59, 'energy': 0.833, 'key'..."
3,Red Hot Chili Peppers,Under the Bridge,Greatest Hits,"[{'danceability': 0.554, 'energy': 0.49, 'key'..."
4,Rockabye Baby!,Under the Bridge,Lullaby Renditions of Red Hot Chili Peppers,"[{'danceability': 0.704, 'energy': 0.06, 'key'..."
5,Jack Harlow,Churchill Downs (feat. Drake),Come Home The Kids Miss You,"[{'danceability': 0.71, 'energy': 0.522, 'key'..."
6,All Saints,Under the Bridge,All Saints,"[{'danceability': 0.717, 'energy': 0.564, 'key..."
7,Red Hot Chili Peppers,Under the Bridge,Blood Sugar Sex Magik,"[{'danceability': 0.559, 'energy': 0.345, 'key..."
8,Red Hot Chili Peppers,Under The Bridge,What Hits!?,"[{'danceability': 0.475, 'energy': 0.401, 'key..."
9,Sam Hunt,Water Under The Bridge,Water Under The Bridge,"[{'danceability': 0.635, 'energy': 0.866, 'key..."


As you can see, this DataFrame looks a bit off as the audio descriptions aren't expanded - all of the data is clumped together in one cell. Let's correct this, so we can see each audio feature as an individual column.

In [None]:
# quick function we can <<<<<<<use to select only the 1st item 
# this can also be done simply with [0], but we wanted to show you how you can incorporate a custom function into your work

def first_value (x):
    return x[0]

# making a DataFrame from the audio features of the songs in list_of_songs
df_audio_features = pd.json_normalize(df['Audio Description'].apply(first_value))

df_audio_features

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.559,0.345,4,-13.496,1,0.0459,0.0576,0.000105,0.141,0.458,84.581,audio_features,3d9DChrdc6BOeFsbrZ3Is0,spotify:track:3d9DChrdc6BOeFsbrZ3Is0,https://api.spotify.com/v1/tracks/3d9DChrdc6BO...,https://api.spotify.com/v1/audio-analysis/3d9D...,264307,4
1,0.781,0.355,9,-8.158,1,0.0376,0.471,0.878,0.125,0.363,131.978,audio_features,1AyRpRvLwUwkXwGGuNZHmN,spotify:track:1AyRpRvLwUwkXwGGuNZHmN,https://api.spotify.com/v1/tracks/1AyRpRvLwUwk...,https://api.spotify.com/v1/audio-analysis/1AyR...,244196,4
2,0.59,0.833,5,-6.503,0,0.0615,0.0142,5e-06,0.105,0.538,94.963,audio_features,4jL6WWKFDqCOPo2hC3VhSS,spotify:track:4jL6WWKFDqCOPo2hC3VhSS,https://api.spotify.com/v1/tracks/4jL6WWKFDqCO...,https://api.spotify.com/v1/audio-analysis/4jL6...,240439,4
3,0.554,0.49,4,-8.046,1,0.0457,0.0168,0.000534,0.136,0.513,84.275,audio_features,23NPGXlSaIqWzvxIRhM2oG,spotify:track:23NPGXlSaIqWzvxIRhM2oG,https://api.spotify.com/v1/tracks/23NPGXlSaIqW...,https://api.spotify.com/v1/audio-analysis/23NP...,265507,4
4,0.704,0.06,4,-21.087,1,0.0907,0.946,0.912,0.109,0.791,158.085,audio_features,45VCZtguiCNu5zJ3YkmcWC,spotify:track:45VCZtguiCNu5zJ3YkmcWC,https://api.spotify.com/v1/tracks/45VCZtguiCNu...,https://api.spotify.com/v1/audio-analysis/45VC...,259693,4
5,0.71,0.522,11,-9.102,1,0.36,0.615,0.0,0.11,0.37,96.448,audio_features,3EMp20j5E42MxfFbsEsIvD,spotify:track:3EMp20j5E42MxfFbsEsIvD,https://api.spotify.com/v1/tracks/3EMp20j5E42M...,https://api.spotify.com/v1/audio-analysis/3EMp...,309327,4
6,0.717,0.564,6,-7.752,0,0.0551,0.0452,0.000464,0.0897,0.728,85.996,audio_features,78M2x5ojbdTmUOpWrbijG3,spotify:track:78M2x5ojbdTmUOpWrbijG3,https://api.spotify.com/v1/tracks/78M2x5ojbdTm...,https://api.spotify.com/v1/audio-analysis/78M2...,301067,4
7,0.559,0.345,4,-13.496,1,0.0459,0.0576,0.000105,0.141,0.458,84.581,audio_features,5PclxRY6shIQzSKxxbdZso,spotify:track:5PclxRY6shIQzSKxxbdZso,https://api.spotify.com/v1/tracks/5PclxRY6shIQ...,https://api.spotify.com/v1/audio-analysis/5Pcl...,264307,4
8,0.475,0.401,9,-11.672,1,0.0515,0.0285,5.8e-05,0.115,0.475,84.742,audio_features,7C3fJ1q01IQ137IQTRwmVZ,spotify:track:7C3fJ1q01IQ137IQTRwmVZ,https://api.spotify.com/v1/tracks/7C3fJ1q01IQ1...,https://api.spotify.com/v1/audio-analysis/7C3f...,267560,4
9,0.635,0.866,5,-2.695,1,0.0473,0.0697,0.0,0.0955,0.853,133.889,audio_features,4viFAHmivkYQKkwLvwKOgg,spotify:track:4viFAHmivkYQKkwLvwKOgg,https://api.spotify.com/v1/tracks/4viFAHmivkYQ...,https://api.spotify.com/v1/audio-analysis/4viF...,169760,4


In [None]:
# merge the expanded audio features with the original DataFrame
new_df = pd.merge(df, df_audio_features, left_index=True, right_index=True)

# drop the old ugly column where all the audio features are clumped together
new_df.drop('Audio Description', axis=1, inplace=True)

new_df

Unnamed: 0,Artist,Title,Album,danceability,energy,key,loudness,mode,speechiness,acousticness,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,Red Hot Chili Peppers,Under the Bridge,Blood Sugar Sex Magik (Deluxe Edition),0.559,0.345,4,-13.496,1,0.0459,0.0576,...,0.141,0.458,84.581,audio_features,3d9DChrdc6BOeFsbrZ3Is0,spotify:track:3d9DChrdc6BOeFsbrZ3Is0,https://api.spotify.com/v1/tracks/3d9DChrdc6BO...,https://api.spotify.com/v1/audio-analysis/3d9D...,264307,4
1,owlh,Under the Bridge,cover to cover,0.781,0.355,9,-8.158,1,0.0376,0.471,...,0.125,0.363,131.978,audio_features,1AyRpRvLwUwkXwGGuNZHmN,spotify:track:1AyRpRvLwUwkXwGGuNZHmN,https://api.spotify.com/v1/tracks/1AyRpRvLwUwk...,https://api.spotify.com/v1/audio-analysis/1AyR...,244196,4
2,Adele,Water Under the Bridge,25,0.59,0.833,5,-6.503,0,0.0615,0.0142,...,0.105,0.538,94.963,audio_features,4jL6WWKFDqCOPo2hC3VhSS,spotify:track:4jL6WWKFDqCOPo2hC3VhSS,https://api.spotify.com/v1/tracks/4jL6WWKFDqCO...,https://api.spotify.com/v1/audio-analysis/4jL6...,240439,4
3,Red Hot Chili Peppers,Under the Bridge,Greatest Hits,0.554,0.49,4,-8.046,1,0.0457,0.0168,...,0.136,0.513,84.275,audio_features,23NPGXlSaIqWzvxIRhM2oG,spotify:track:23NPGXlSaIqWzvxIRhM2oG,https://api.spotify.com/v1/tracks/23NPGXlSaIqW...,https://api.spotify.com/v1/audio-analysis/23NP...,265507,4
4,Rockabye Baby!,Under the Bridge,Lullaby Renditions of Red Hot Chili Peppers,0.704,0.06,4,-21.087,1,0.0907,0.946,...,0.109,0.791,158.085,audio_features,45VCZtguiCNu5zJ3YkmcWC,spotify:track:45VCZtguiCNu5zJ3YkmcWC,https://api.spotify.com/v1/tracks/45VCZtguiCNu...,https://api.spotify.com/v1/audio-analysis/45VC...,259693,4
5,Jack Harlow,Churchill Downs (feat. Drake),Come Home The Kids Miss You,0.71,0.522,11,-9.102,1,0.36,0.615,...,0.11,0.37,96.448,audio_features,3EMp20j5E42MxfFbsEsIvD,spotify:track:3EMp20j5E42MxfFbsEsIvD,https://api.spotify.com/v1/tracks/3EMp20j5E42M...,https://api.spotify.com/v1/audio-analysis/3EMp...,309327,4
6,All Saints,Under the Bridge,All Saints,0.717,0.564,6,-7.752,0,0.0551,0.0452,...,0.0897,0.728,85.996,audio_features,78M2x5ojbdTmUOpWrbijG3,spotify:track:78M2x5ojbdTmUOpWrbijG3,https://api.spotify.com/v1/tracks/78M2x5ojbdTm...,https://api.spotify.com/v1/audio-analysis/78M2...,301067,4
7,Red Hot Chili Peppers,Under the Bridge,Blood Sugar Sex Magik,0.559,0.345,4,-13.496,1,0.0459,0.0576,...,0.141,0.458,84.581,audio_features,5PclxRY6shIQzSKxxbdZso,spotify:track:5PclxRY6shIQzSKxxbdZso,https://api.spotify.com/v1/tracks/5PclxRY6shIQ...,https://api.spotify.com/v1/audio-analysis/5Pcl...,264307,4
8,Red Hot Chili Peppers,Under The Bridge,What Hits!?,0.475,0.401,9,-11.672,1,0.0515,0.0285,...,0.115,0.475,84.742,audio_features,7C3fJ1q01IQ137IQTRwmVZ,spotify:track:7C3fJ1q01IQ137IQTRwmVZ,https://api.spotify.com/v1/tracks/7C3fJ1q01IQ1...,https://api.spotify.com/v1/audio-analysis/7C3f...,267560,4
9,Sam Hunt,Water Under The Bridge,Water Under The Bridge,0.635,0.866,5,-2.695,1,0.0473,0.0697,...,0.0955,0.853,133.889,audio_features,4viFAHmivkYQKkwLvwKOgg,spotify:track:4viFAHmivkYQKkwLvwKOgg,https://api.spotify.com/v1/tracks/4viFAHmivkYQ...,https://api.spotify.com/v1/audio-analysis/4viF...,169760,4


If you like a challenge and want to have a go at testing what you've just learnt. Follow the steps below and see if you can make a DataFrame similar to above with expanded audio features. This time though, do it for a playlist of your choosing on spotify. Then see if you can expand it to include the songs from multiple playlists.

Try not to look at the solution, but we've included it below in case you get stuck

### Collect a big dataframe of songs with their audio features

- Start by looking for a playlist on spotify (it does not have to be your playlist), and copy its url.

- Extract the audio features for each song on your playlist.

- Now collect the link of many playlists and do the same for all of them.

- Structure the information as a dataframe where each row is a song and the columns are audio features.

In [None]:
list_of_songs = []

def song_features(human_song_title):
    # search for the song title you enter into the function, limited to the first 10 results
    results = sp.search(q=human_song_title, limit = 10)['tracks']['items']
    # create a loop, so we only select the parts of the json we need
    for i in results:
        # empty dictionary to be filled with the information below
        track_dict = {}
        # add the key artist and a corresponding value to the dictionary
        track_dict['Artist'] = i['artists'][0]['name']
        # add the key title and the corresponding value to the dictionary
        track_dict['Title'] = i['name'] 
        # add the key album and the corresponding value to the dictionary
        track_dict['Album'] = i['album']['name']
        # add the key audio description and the corresponding value to the dictionary
        track_dict['Audio Description'] = sp.audio_features(i['id'])
        # add the dictionary to the list list_of_songs
        list_of_songs.append(track_dict)
    # output list_of_songs 
    return list_of_songs

# call the function with a song to test
song_features("Under the Bridge")

#### Solution

In [None]:
list_of_playlists = ["spotify:playlist:2zjepkjZxLpeIBlvPCWIHl",
                    "spotify:playlist:0ce6Rmxf7QXroqa1wzjWY8"]

track_list = []
for i in list_of_playlists:
    individual_playlist = sp.user_playlist_tracks(user="spotify", playlist_id=i)['items']
    for j in individual_playlist:
        track_dict = {}
        track_dict["Artist"] = j['track']['artists'][0]['name']
        track_dict["Title"] = j['track']['name']
        track_dict["Album"] = j['track']['album']['name']
        track_dict["Audio Description"] = sp.audio_features(j['track']['id'])
        track_list.append(track_dict)

print(track_list)

[{'Artist': 'James Blake', 'Title': 'Words That We Both Know', 'Album': '200 Press EP', 'Audio Description': [{'danceability': 0.5, 'energy': 0.439, 'key': 7, 'loudness': -9.207, 'mode': 0, 'speechiness': 0.107, 'acousticness': 0.927, 'instrumentalness': 0.000445, 'liveness': 0.281, 'valence': 0.42, 'tempo': 62.058, 'type': 'audio_features', 'id': '4BZtAKMwi4aM18Zr2mTIgt', 'uri': 'spotify:track:4BZtAKMwi4aM18Zr2mTIgt', 'track_href': 'https://api.spotify.com/v1/tracks/4BZtAKMwi4aM18Zr2mTIgt', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/4BZtAKMwi4aM18Zr2mTIgt', 'duration_ms': 63025, 'time_signature': 5}]}, {'Artist': 'Gene Wilder', 'Title': 'Pure Imagination - From "Willy Wonka & The Chocolate Factory" Soundtrack', 'Album': 'Willy Wonka & The Chocolate Factory', 'Audio Description': [{'danceability': 0.251, 'energy': 0.115, 'key': 10, 'loudness': -20.223, 'mode': 0, 'speechiness': 0.038, 'acousticness': 0.695, 'instrumentalness': 2.11e-05, 'liveness': 0.617, 'valence': 0.1

In [None]:
playlist_df = pd.DataFrame(track_list)

In [None]:
def first_value (x):
    return x[0]

In [None]:
df_a_f = pd.json_normalize(playlist_df['Audio Description'].apply(first_value))

KeyError: ignored

In [None]:
df_audio_features = pd.json_normalize(df['Audio Description'].apply(first_value))

df_audio_features

In [None]:
new_playlist_df = pd.merge(playlist_df, df_a_f, left_index=True, right_index=True)

NameError: ignored

In [None]:
new_playlist_df.drop('Audio Description', axis=1, inplace=True)

In [None]:
new_playlist_df