## Import required packages

In [5]:
import numpy as np 
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import json

In [66]:
!ls

README.md  data		      requirements.txt	scripts
bot_env    jupyter_notebooks  resources


## Authenticate with Spotify API

In [67]:
# Credentials are specified in the "activate" script from the venv and can be obtained in the Spotify Developer Platform
auth_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(auth_manager=auth_manager)

In [8]:
### Show playlists to check if connection works

In [9]:
playlists = sp.user_playlists('arthevard')
while playlists:
    for i, playlist in enumerate(playlists['items']):
        print("%4d %s %s" % (i + 1 + playlists['offset'], playlist['uri'],  playlist['name']))
    if playlists['next']:
        playlists = sp.next(playlists)
    else:
        playlists = None

   1 spotify:playlist:6pm6NYE8x95f3qu9nnkY3s Gestört
   2 spotify:playlist:3w4J1cBM7G8A3FJpmkQCks Jump Rope Fun
   3 spotify:playlist:22F1DzV6R5WTtaU5ZBx1Qh Alman Rap
   4 spotify:playlist:7sRH3hcKY9vi5Om9hKWED6 Dancehall Excellence
   5 spotify:playlist:7bKCXksqUpwO2u77DNDB6E Schweche Excellence
   6 spotify:playlist:2NL4skzzhfDIWGexthIoti Cloudrap und sowas
   7 spotify:playlist:45jLD4C7zDnVwmrs9Q2Yst Chill Excellence
   8 spotify:playlist:7JLpMUShki1OtQBbq0SJSD Spulung
   9 spotify:playlist:6UcKBh9vtZOGIMEx1FGXGj Hip Hop Excellence
  10 spotify:playlist:4IksObf8ZWrLkyWhcOp1Vp Classic Excellence
  11 spotify:playlist:62jUJdEWgsCmrh2EpCABRG Rock Excellence
  12 spotify:playlist:5QKkIeuRrBQ2L501fAi5vy Melancholy Excellence
  13 spotify:playlist:7i2EfrY01tIbPZ81sQXKKE Nostalgia Excellence
  14 spotify:playlist:5mJ07XW8fizydNB9mqRzKk Zu Arg für Bib LEL²
  15 spotify:playlist:48sxmNjqMplmnbSjnky8Tz Memes


In [10]:

urn = 'spotify:artist:3NjbpG6MmFGVLXwbcPXH90'
artist = sp.artist(urn)
print(json.dumps(artist, indent=2))



{
  "external_urls": {
    "spotify": "https://open.spotify.com/artist/3NjbpG6MmFGVLXwbcPXH90"
  },
  "followers": {
    "href": null,
    "total": 62776
  },
  "genres": [
    "frauenrap",
    "german cloud rap",
    "german drill",
    "german hip hop",
    "indie deutschrap"
  ],
  "href": "https://api.spotify.com/v1/artists/3NjbpG6MmFGVLXwbcPXH90",
  "id": "3NjbpG6MmFGVLXwbcPXH90",
  "images": [
    {
      "height": 640,
      "url": "https://i.scdn.co/image/ab6761610000e5eb2b38a3b9a27030af33128bf9",
      "width": 640
    },
    {
      "height": 320,
      "url": "https://i.scdn.co/image/ab676161000051742b38a3b9a27030af33128bf9",
      "width": 320
    },
    {
      "height": 160,
      "url": "https://i.scdn.co/image/ab6761610000f1782b38a3b9a27030af33128bf9",
      "width": 160
    }
  ],
  "name": "Haiyti",
  "popularity": 56,
  "type": "artist",
  "uri": "spotify:artist:3NjbpG6MmFGVLXwbcPXH90"
}


# Import users and artists URIs from CSV files

In [11]:
user_uri_list = pd.read_csv("data/users.csv", sep=";")["user_uri"]
artist_uri_list = pd.read_csv("data/artists.csv", sep=";")["artist_uri"]

In [12]:
user = sp.user(user_uri_list[0])
user

{'display_name': 'Carsten We',
 'external_urls': {'spotify': 'https://open.spotify.com/user/arthevard'},
 'followers': {'href': None, 'total': 21},
 'href': 'https://api.spotify.com/v1/users/arthevard',
 'id': 'arthevard',
 'images': [{'height': None,
   'url': 'https://scontent-lcy1-1.xx.fbcdn.net/v/t1.6435-1/p320x320/36594189_10214425521425989_1333356851693092864_n.jpg?_nc_cat=110&ccb=1-3&_nc_sid=0c64ff&_nc_ohc=ghacmvYW1FsAX_t8azS&_nc_ht=scontent-lcy1-1.xx&tp=6&oh=db285e745ce7b56774c9bb105b6220eb&oe=60A3559D',
   'width': None}],
 'type': 'user',
 'uri': 'spotify:user:arthevard'}

## Write basic functions to extract data from artists

In [13]:
def get_albums_from_artists(artist_uri_list):
    """A function returning albums from a list of artist URIs as pandas DataFrame.

    Args:
        artist_uri_list (list): A list containing artist URIs.

    Returns:
        DataFrame: A pandas DataFrame containing artist names, URIs, and release date.
    """

    # Create header for output df
    albums_list = [["name", "album_uri", "album_release_date", "artist_uri"]]

    # Loop through list of artist uris
    for artist_uri in artist_uri_list:
        # Get album from artist
        albums = sp.artist_albums(artist_uri)
        
        # Append each album to list
        for album in albums["items"]:
            album_name = album["name"]
            album_uri = album["uri"]
            album_release_date = album["release_date"]
            albums_list.append([album_name, album_uri, album_release_date, artist_uri])

    # Create df from list of albums for all artist
    albums_df = pd.DataFrame(data=albums_list[1:], columns=albums_list[0])

    return albums_df


In [14]:
albums_df = get_albums_from_artists(artist_uri_list)
print("Number of albums: " + str(albums_df.shape[0]))
print("Head of albums dataframe:")
albums_df.head()

Number of albums: 80
Head of albums dataframe:


Unnamed: 0,name,album_uri,album_release_date,artist_uri
0,MIESES LEBEN,spotify:album:18iDtijC0YUY43T2teLEiK,2021-04-15,spotify:artist:3NjbpG6MmFGVLXwbcPXH90
1,influencer,spotify:album:7MguPrKUh0eyZY0VqmMO0G,2020-12-04,spotify:artist:3NjbpG6MmFGVLXwbcPXH90
2,SUI SUI,spotify:album:2M51xiotVFca2hT7OppmgK,2020-07-03,spotify:artist:3NjbpG6MmFGVLXwbcPXH90
3,Nightliner Reloaded,spotify:album:5KYZEBBBwDPZ9UY9Tpr3E0,2019-10-25,spotify:artist:3NjbpG6MmFGVLXwbcPXH90
4,Perroquet,spotify:album:5h5iAcFwOtsKZJZRMCZ8HV,2019-06-07,spotify:artist:3NjbpG6MmFGVLXwbcPXH90


In [15]:
def get_tracks_from_albums(album_uri_list):
    """A function returning tracks from a list of album URIs as pandas DataFrame.

    Args:
        album_uri_list (list): A list containing album URIs.

    Returns:
        DataFrame: A pandas DataFrame containing track names, URIs and release dates.
    """

    track_list = [["track_name", "track_uri", "track_release_date"]]

    for album_uri in album_uri_list:
        album_tracks = sp.album_tracks(album_uri, limit=50, offset=0)["items"]
        count_tracks_in_album = len(album_tracks)
        album_release_date = sp.album(album_uri)["release_date"]

        # This part is probably very slow and should be improved by accessing the API less often
        for track_number in range(count_tracks_in_album):
            track_name = album_tracks[track_number]["name"]
            track_uri = album_tracks[track_number]["uri"]
             
            track_list.append([track_name, track_uri, album_release_date])

    # Create df from list of tracks for all albums
    track_df = pd.DataFrame(data=track_list[1:], columns=track_list[0])
    
    return track_df

In [91]:
tracks = get_tracks_from_albums(["18iDtijC0YUY43T2teLEiK"])
tracks.head()


Unnamed: 0,track_name,track_uri,track_release_date
0,INTRO,spotify:track:75R9wfcPJyCXZ6fXV1IgQC,2021-04-15
1,ROBBERY IS BACK,spotify:track:5aEw37R8QW6ppQIYYHVKKp,2021-04-15
2,SNOB,spotify:track:6fgwmfyNfaf9FxfcyjHdNA,2021-04-15
3,OMG,spotify:track:4xvetmz1M1ECP8pkUkQezO,2021-04-15
4,TOXISCH,spotify:track:2E7aEEoiG3tE9RSWfDaVEZ,2021-04-15


In [30]:
tracks[tracks["track_release_date"] == "2021-04-15"]

Unnamed: 0,track_name,track_uri,track_release_date
0,INTRO,spotify:track:75R9wfcPJyCXZ6fXV1IgQC,2021-04-15
1,ROBBERY IS BACK,spotify:track:5aEw37R8QW6ppQIYYHVKKp,2021-04-15
2,SNOB,spotify:track:6fgwmfyNfaf9FxfcyjHdNA,2021-04-15
3,OMG,spotify:track:4xvetmz1M1ECP8pkUkQezO,2021-04-15
4,TOXISCH,spotify:track:2E7aEEoiG3tE9RSWfDaVEZ,2021-04-15
5,PAPI CHAIN,spotify:track:6Px6MaH8R6CQfneAD6cVjB,2021-04-15
6,HELIKOPTER,spotify:track:41Xwa5IuEqV0okKFygVZI8,2021-04-15
7,FREITAG,spotify:track:1zLuIPhPHk14s4I5uOtqaS,2021-04-15
8,WAS NOCH,spotify:track:161RGAj9K2svKcfPwDNnFl,2021-04-15
9,ERSTER TAG,spotify:track:1kxXL1KDNUuO8mTgP5sFQw,2021-04-15


In [76]:
import datetime

def str_to_date(date_as_str):
    date_time_obj = datetime.datetime.strptime(date_as_str, '%Y-%m-%d')
    return date_time_obj.date()

In [108]:
tracks["track_release_date_datetime"] = tracks["track_release_date"].apply(str_to_date)
tracks.head()

Unnamed: 0,track_name,track_uri,track_release_date,track_release_date_datetime,days_since_release
0,INTRO,spotify:track:75R9wfcPJyCXZ6fXV1IgQC,2021-04-15,2021-04-15,
1,ROBBERY IS BACK,spotify:track:5aEw37R8QW6ppQIYYHVKKp,2021-04-15,2021-04-15,
2,SNOB,spotify:track:6fgwmfyNfaf9FxfcyjHdNA,2021-04-15,2021-04-15,
3,OMG,spotify:track:4xvetmz1M1ECP8pkUkQezO,2021-04-15,2021-04-15,
4,TOXISCH,spotify:track:2E7aEEoiG3tE9RSWfDaVEZ,2021-04-15,2021-04-15,


In [115]:
tracks["days_since_release"] = tracks["track_release_date_datetime"].apply(datediff_today)
tracks.head()

Unnamed: 0,track_name,track_uri,track_release_date,track_release_date_datetime,days_since_release
0,INTRO,spotify:track:75R9wfcPJyCXZ6fXV1IgQC,2021-04-15,2021-04-15,3
1,ROBBERY IS BACK,spotify:track:5aEw37R8QW6ppQIYYHVKKp,2021-04-15,2021-04-15,3
2,SNOB,spotify:track:6fgwmfyNfaf9FxfcyjHdNA,2021-04-15,2021-04-15,3
3,OMG,spotify:track:4xvetmz1M1ECP8pkUkQezO,2021-04-15,2021-04-15,3
4,TOXISCH,spotify:track:2E7aEEoiG3tE9RSWfDaVEZ,2021-04-15,2021-04-15,3


In [114]:
def datediff_today(date):
    today = datetime.date.today()
    return (today - date).days

In [None]:
def get_all_tracks_from_artists(artist_uri_list):
    """A function returning tracks from a list of artist URIs as pandas DataFrame.

    Args:
        artist_uri_list (list): A list containing artist URIs.

    Returns:
        DataFrame: A pandas DataFrame containing track names, URIs, release dates and artist names.
    """

    track_list = [["track_name", "track_uri", "track_release_date", "artist_name"]]
    track_df = pd.DataFrame(columns=track_list[0])

    for artist_uri in artist_uri_list:
        # Get artist name and albums
        artist_name = sp.artist(artist_uri)["name"]
        albums = get_albums_from_artists([artist_uri])

        # Get tracks from artist albums
        tracks_artist_df = get_tracks_from_albums(albums["album_uri"].to_list())
        tracks_artist_df["artist_name"] = artist_name

        # Append new songs to dataframe
        track_df = track_df.append(tracks_artist_df)
    
    return track_df

In [None]:
get_all_tracks_from_artists(["spotify:artist:3NjbpG6MmFGVLXwbcPXH90", "spotify:artist:6ynopZPMBXcIGBI9M02Un5"])

In [None]:
artists = pd.read_csv("data/artists.csv")
artists.head()