In [2]:
%load_ext nb_black

%load_ext autoreload
%autoreload 2

<IPython.core.display.Javascript object>

In [3]:
def set_paths() -> None:
    """
    set correct working directory and python path 
    """
    import sys
    import os

    # only change the directory if we have not yet done so
    if "cwd" not in globals():
        # noinspection PyGlobalUndefined
        global cwd
        cwd = os.path.join(os.getcwd(), os.pardir)
        os.chdir(cwd)

    print(f"working dir is '{os.getcwd()}'")

# we are setting the path to the root directory of the project
# allowing us to load the packages
set_paths()

working dir is '/Users/ariane/GitHub/spotify-dev'


<IPython.core.display.Javascript object>

In [4]:
import os
import numpy as np
import pandas as pd
import spotipy
import yaml
from spotipy.oauth2 import SpotifyClientCredentials


<IPython.core.display.Javascript object>

## Download data from Spotify API

In [5]:
# Read the credentials from credentials.yml
with open("credentials.yml") as f:
    credentials = yaml.load(f, Loader=yaml.FullLoader)

# Set up the Spotify API
client_id = credentials["client_id"]  # YOUR_CLIENT_ID
client_secret = credentials["client_secret"]  # YOUR_CLIENT_SECRET
client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)

# Set global variables  
os.environ["SPOTIPY_CLIENT_ID"] = client_id
os.environ["SPOTIPY_CLIENT_SECRET"] = client_secret
os.environ["SPOTIPY_REDIRECT_URI"] = "http://localhost:8090/callback"

<IPython.core.display.Javascript object>

<div class="alert alert-block alert-info">
<b><font color='orange'>Authentification:</font></b> Identify through OAuth
</div>

In [6]:
from spotipy.oauth2 import SpotifyOAuth

scope = 'playlist-read-private'
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

<IPython.core.display.Javascript object>

## Define useful functions

In [7]:
# my spotify username and playlist ids 
# on playlist page, click on "..." -> then on "Share" -> then "Copy Spotify URI"
def getTrackIDs(user: str, playlist_id: str):
    """
    Get track ids from a playlist   
    Args:
        user (str): spotify username
        playlist_id (str): spotify playlist id
    Returns:
        track_infos (list): list of track infos
    """
    track_infos = []
    playlist = sp.user_playlist(user, playlist_id)
    for idx, item in enumerate(playlist['tracks']['items']):
        track = item['track']
        track_infos.append([track['id'], idx, item['added_by']['id']])
    return track_infos

<IPython.core.display.Javascript object>

In [8]:
# Get spotify singular song data from these locations
# https://developer.spotify.com/documentation/web-api/reference/#/operations/get-track
# https://developer.spotify.com/documentation/web-api/reference/#/operations/get-audio-features

def getTrackFeatures(id: str):
    """
    Get the audio features for a single track
    Args:
        id: the spotify id of the track
    Returns:
        a dictionary of the audio features  
    """
    meta = sp.track(id)
    features = sp.audio_features(id)

    # meta
    name = meta['name']
    album = meta['album']['name']
    artist = meta['album']['artists'][0]['name']
    release_date = meta['album']['release_date']
    length = meta['duration_ms']
    popularity = meta['popularity']

    # features
    acousticness = features[0]['acousticness']
    danceability = features[0]['danceability']
    energy = features[0]['energy']
    instrumentalness = features[0]['instrumentalness']
    liveness = features[0]['liveness']
    loudness = features[0]['loudness']
    speechiness = features[0]['speechiness']
    tempo = features[0]['tempo']
    valence = features[0]['valence']
    time_signature = features[0]['time_signature']
    key = features[0]['key']
    mode = features[0]['mode']
    uri = features[0]['uri']

    track = [name, album, artist, release_date,
             length, popularity, acousticness,
             danceability, energy, instrumentalness, 
             liveness, loudness, speechiness, tempo, 
             valence, time_signature,
             key, mode, uri]
    return track


<IPython.core.display.Javascript object>

## Play with the API


### Define a few useful variables

In [9]:
username = "arianedalens"
playlist_id = "37i9dQZF1EJubkRHzOYlbh?si=2a1ff6c599a94644"

<IPython.core.display.Javascript object>

<div class="alert alert-block alert-success">
<b><font color='green'>Explore:</font></b> User data
</div>

In [10]:
user = sp.user("laeti0306")
print(user)

{'display_name': 'Laetitia Giovannoni', 'external_urls': {'spotify': 'https://open.spotify.com/user/laeti0306'}, 'followers': {'href': None, 'total': 1}, 'href': 'https://api.spotify.com/v1/users/laeti0306', 'id': 'laeti0306', 'images': [{'height': None, 'url': 'https://platform-lookaside.fbsbx.com/platform/profilepic/?asid=755165281183281&height=300&width=300&ext=1671113263&hash=AeRquiKD5pfETdIwzH0', 'width': None}], 'type': 'user', 'uri': 'spotify:user:laeti0306'}


<IPython.core.display.Javascript object>

<div class="alert alert-block alert-success">
<b><font color='green'>Explore:</font></b> Playlists from a user
</div>

In [33]:
results = sp.current_user_playlists(limit=10, offset=0)
for i, item in enumerate(results['items']):
    print("%d %s" % (i, item['name']))

0 Coline's picks
1 Radar des sorties
2 Discover Weekly
3 Your Time Capsule
4 On Repeat
5 Ariane
6 TBD
7 La vrai playlist à deux ! 
8 Laetitia + Ariane
9 Daily Mix 1


<IPython.core.display.Javascript object>

<div class="alert alert-block alert-success">
<b><font color='green'>EDA:</font></b> On a collaborative playlist

In [12]:
track_infos = getTrackIDs(username, playlist_id)
features_list = []
for track_info in track_infos:
    features = getTrackFeatures(track_info[0])
    features_list.append([*features, *track_info[1:]])
    
# Create a dataframe from the list of features
df = pd.DataFrame(features_list, columns=['name', 'album', 'artist', 'release_date',
                                            'length', 'popularity', 'acousticness',
                                            'danceability', 'energy', 'instrumentalness', 
                                            'liveness', 'loudness', 'speechiness', 'tempo', 
                                            'valence', 'time_signature',
                                            'key', 'mode', 'uri', 'playlist_idx', 'added_by'])



<IPython.core.display.Javascript object>

In [14]:
df.to_csv("data/raw/spotify_data.csv", index=False)

<IPython.core.display.Javascript object>

In [35]:
df_stat = df.groupby('added_by').mean()

<IPython.core.display.Javascript object>

In [16]:
import urllib.request
from PIL import Image

for user in df.added_by.unique():
    print(sp.user(user)['display_name'])
    print(sp.user(user)['images'][0]['url'])
    #urllib.request.urlretrieve(sp.user(user)['images'][0]['url'], f"img/users/{user}.jpg") 
    #img = Image.open(f"img/users/{user}.jpg")
    #img.show()

Laetitia Giovannoni
https://platform-lookaside.fbsbx.com/platform/profilepic/?asid=755165281183281&height=300&width=300&ext=1671113263&hash=AeRquiKD5pfETdIwzH0
Ariane Dlns
https://i.scdn.co/image/ab6775700000ee8542e42794651cc973c190b2fb


<IPython.core.display.Javascript object>