In [2]:
import spotipy

from spotipy.oauth2 import SpotifyClientCredentials

# Make sure you have created a spotify_credentials.py file in the same directory as this notebook.
# the contents should be:
# CLIENT_ID = 'YOUR_CLIENT_ID'
# CLIENT_SECRET_ID = 'YOUR CLIENT_SECRET_ID'
from spotify_credentials import CLIENT_ID, CLIENT_SECRET_ID

In [None]:
# playlist url
# https://open.spotify.com/playlist/37i9dQZF1DX7iB3RCnBnN4?si=9d199f5212064db7

In [66]:
# authenticate and initialise the Spotipy object
# to get the credentials
auth_manager = SpotifyClientCredentials(CLIENT_ID, CLIENT_SECRET_ID)
sp = spotipy.Spotify(auth_manager=auth_manager)

In [4]:
# song URL = https://open.spotify.com/track/6IPwKM3fUUzlElbvKw2sKl?si=8a9888fa74cf4f4a

# How to get a track id? From a sharing link:
# https://open.spotify.com/track/6IPwKM3fUUzlElbvKw2sKl?si=8a9888fa74cf4f4a

# https://open.spotify.com/track/{TRACK_ID}?si=8a9888fa74cf4f4a

# In a similar way, you can access data about artists, albums, playlists, etc.

TRACK_ID = '6IPwKM3fUUzlElbvKw2sKl'

track_info = sp.track(TRACK_ID)

In [9]:
# display all the available info
from pprint import pprint
pprint(list(track_info.keys()))

['album',
 'artists',
 'available_markets',
 'disc_number',
 'duration_ms',
 'explicit',
 'external_ids',
 'external_urls',
 'href',
 'id',
 'is_local',
 'name',
 'popularity',
 'preview_url',
 'track_number',
 'type',
 'uri']


In [10]:
#  we'll get data about release year, id, name, and artist names
track_info['artists']

[{'external_urls': {'spotify': 'https://open.spotify.com/artist/3uwAm6vQy7kWPS2bciKWx9'},
  'href': 'https://api.spotify.com/v1/artists/3uwAm6vQy7kWPS2bciKWx9',
  'id': '3uwAm6vQy7kWPS2bciKWx9',
  'name': 'girl in red',
  'type': 'artist',
  'uri': 'spotify:artist:3uwAm6vQy7kWPS2bciKWx9'}]

In [None]:
# get a list of artist names by iterating over the artist info in the json
artist_names = [
    artist['name']
    for artist
    in track_info['artists']
]

In [15]:
# get the release year from the reease date (formatted in the YYYY-MM-DD convention)
release_year = track_info['album']['release_date'].split('-')[0]
release_year

'2018'

In [18]:
# get the spotify URL to the track
spotify_url = track_info['external_urls']['spotify']
spotify_url

'https://open.spotify.com/track/6IPwKM3fUUzlElbvKw2sKl'

In [32]:
# combine all the info into one dict that we will want to save to the db
def get_track_info(track_info):
    # track_info = sp.track(track_id)

    artist_names = [artist['name'] for artist in track_info['artists']]
    release_year = track_info['album']['release_date'].split('-')[0]
    spotify_url = track_info['external_urls']['spotify']

    track_data = {
        'artists': artist_names,
        'id': track_info['id'],
        'name': track_info['name'],
        'release_year': release_year,
        'spotify_url': spotify_url,
    }
    return track_data
    

In [23]:
# let's see how it works!
get_track_info(TRACK_ID)

{'artists': ['girl in red'],
 'id': '6IPwKM3fUUzlElbvKw2sKl',
 'name': 'we fell in love in october',
 'release_year': '2018',
 'spotify_url': 'https://open.spotify.com/track/6IPwKM3fUUzlElbvKw2sKl'}

In [24]:
# playlist url
# https://open.spotify.com/playlist/37i9dQZF1DX7iB3RCnBnN4?si=9d199f5212064db7
PLAYLIST_ID = '37i9dQZF1DX7iB3RCnBnN4'
playlist_info = sp.playlist(PLAYLIST_ID)

In [26]:
pprint.pprint(list(playlist_info.keys()))

['collaborative',
 'description',
 'external_urls',
 'followers',
 'href',
 'id',
 'images',
 'name',
 'owner',
 'primary_color',
 'public',
 'snapshot_id',
 'tracks',
 'type',
 'uri']


In [27]:
pprint.pprint(playlist_info['tracks'])

{'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX7iB3RCnBnN4/tracks?offset=0&limit=100&additional_types=track',
 'items': [{'added_at': '2024-04-10T18:35:44Z',
            'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/'},
                         'href': 'https://api.spotify.com/v1/users/',
                         'id': '',
                         'type': 'user',
                         'uri': 'spotify:user:'},
            'is_local': False,
            'primary_color': None,
            'track': {'album': {'album_type': 'album',
                                'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/7n2wHs1TKAczGzO7Dd2rGr'},
                                             'href': 'https://api.spotify.com/v1/artists/7n2wHs1TKAczGzO7Dd2rGr',
                                             'id': '7n2wHs1TKAczGzO7Dd2rGr',
                                             'name': 'Shawn Mendes',
                                

In [29]:
track_info = playlist_info['tracks']['items'][0]['track']

list

In [41]:
# Now, let's get a list of the info for all the tracks in the playlist

# create a variable with an empty list
playlist_tracks_info = []

# get the tracks info from the playlist data
playlist_items = playlist_info['tracks']['items']

# make sure it's a list so that we can iterate over it
type(playlist_items)

# for every track info in the playlist
for playlist_item in playlist_items:
    # get the track info using the funciton we wrote a few cells above
    item_info = get_track_info(playlist_item['track'])
    # add the formatted info to the list
    playlist_tracks_info.append(item_info)

In [39]:
# let's see the results!
playlist_tracks_info

[{'artists': ['Shawn Mendes'],
  'id': '2bT1PH7Cw3J9p3t7nlXCdh',
  'name': "If I Can't Have You",
  'release_year': '2019',
  'spotify_url': 'https://open.spotify.com/track/2bT1PH7Cw3J9p3t7nlXCdh'}]

In [42]:
# This is how you can do the same thing using list comprehension. It is a Python code structure allowing for creating new lists or dictionaries
# based on existing iterables, while iterating. It is similar to a reversed "for" loop structure.
# You'd often split it into lines for readibility.

# More info on list comprehension:
# https://www.w3schools.com/python/python_lists_comprehension.asp
playlist_tracks_info = [
    get_track_info(playlist_item['track'])
    for playlist_item in playlist_items
]

In [45]:
# Now that we have a list of organised data, let's make it into a Pandas DataFrame
import pandas as pd

playlist_df = pd.DataFrame(playlist_tracks_info)

# This displays the first 5 rows - the "head" method allows you to preview the data easily without scrolling
playlist_df.head()

Unnamed: 0,artists,id,name,release_year,spotify_url
0,[Shawn Mendes],2bT1PH7Cw3J9p3t7nlXCdh,If I Can't Have You,2019,https://open.spotify.com/track/2bT1PH7Cw3J9p3t...
1,[Taylor Swift],1BxfuPKGuaTgP7aM0Bbdwr,Cruel Summer,2019,https://open.spotify.com/track/1BxfuPKGuaTgP7a...
2,[girl in red],6IPwKM3fUUzlElbvKw2sKl,we fell in love in october,2018,https://open.spotify.com/track/6IPwKM3fUUzlElb...
3,[Arctic Monkeys],5XeFesFbtLpXzIVDNQP22n,I Wanna Be Yours,2013,https://open.spotify.com/track/5XeFesFbtLpXzIV...
4,[Doja Cat],4dHRp03oxqJQfjGU8ECo7v,Paint The Town Red,2023,https://open.spotify.com/track/4dHRp03oxqJQfjG...


In [46]:
# Here's how you can save it to a csv file. It should appear in the same directory as the notebook.
playlist_df.to_csv('my_playlist.csv')

In [48]:
# And if you want to save it to an Excel spreadsheet:
# playlist_df.to_excel('my_playlist.xlsx')

In [49]:
# Now let's try saving some data to the database. Please make sure you have mongo running. You should be able to see the db server running if you run "mongosh" from the console
import pymongo

# this should eb the default address but you may want to cross-check it with what Mongo Shell is displaying
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")

In [50]:
# access the database calledd "musicDB"
music_db = mongo_client['musicDB']

In [53]:
# let's get info about the first track in the playlist. This will return a pd.Series object
song_info = playlist_df.iloc[0]

In [60]:
# We can bring it back to the dict format to 
song_dict = song_info.to_dict()

In [61]:
song_dict

{'artists': ['Shawn Mendes'],
 'id': '2bT1PH7Cw3J9p3t7nlXCdh',
 'name': "If I Can't Have You",
 'release_year': '2019',
 'spotify_url': 'https://open.spotify.com/track/2bT1PH7Cw3J9p3t7nlXCdh'}

In [62]:
music_db['SongInfo'].insert_one(song_dict)

InsertOneResult(ObjectId('66337565de92b49e696ced2a'), acknowledged=True)

In [63]:
music_db['SongInfo'].find_one()

{'_id': ObjectId('66337565de92b49e696ced2a'),
 'artists': ['Shawn Mendes'],
 'id': '2bT1PH7Cw3J9p3t7nlXCdh',
 'name': "If I Can't Have You",
 'release_year': '2019',
 'spotify_url': 'https://open.spotify.com/track/2bT1PH7Cw3J9p3t7nlXCdh'}

In [67]:
sp.audio_features(TRACK_ID)

[{'danceability': 0.566,
  'energy': 0.366,
  'key': 7,
  'loudness': -12.808,
  'mode': 1,
  'speechiness': 0.028,
  'acousticness': 0.113,
  'instrumentalness': 0.181,
  'liveness': 0.155,
  'valence': 0.237,
  'tempo': 129.96,
  'type': 'audio_features',
  'id': '6IPwKM3fUUzlElbvKw2sKl',
  'uri': 'spotify:track:6IPwKM3fUUzlElbvKw2sKl',
  'track_href': 'https://api.spotify.com/v1/tracks/6IPwKM3fUUzlElbvKw2sKl',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/6IPwKM3fUUzlElbvKw2sKl',
  'duration_ms': 184154,
  'time_signature': 4}]