In [None]:
import pandas as pd
import streamlit as st
import spotipy
import spotipy.util as util
from dateutil.parser import parse as parse_date
from IPython.display import Image
from IPython.core.display import HTML

## Spotipy OAuth

In [4]:
# API credentials
api_creds = pd.read_csv("../data/external/api_keys.csv", header=None)
# Spotify API token
user_id = api_creds[1].iloc[0]
client_id = api_creds[1].iloc[1]
client_secret = api_creds[1].iloc[2]
# All scope
scope = 'ugc-image-upload user-read-playback-state streaming user-read-email playlist-read-collaborative \
user-modify-playback-state user-read-private playlist-modify-public user-library-modify user-top-read \
user-read-playback-position user-read-currently-playing playlist-read-private user-follow-read \
app-remote-control user-read-recently-played playlist-modify-private user-follow-modify user-library-read'
# Create token
token = util.prompt_for_user_token(
    user_id,
    scope,
    client_id = client_id,
    client_secret = client_secret,
    redirect_uri='http://127.0.0.1:9090')
sp = spotipy.Spotify(auth = token)

NameError: name 'util' is not defined

#### Spotipy OAuth - Function

In [None]:
FILE_PATH_API_CREDENTIAL = r"C:\Users\Jaume\Documents\Python Projects\spotify_compare\data\external\api_keys.csv"
SPOTIPY_SCOPE = "ugc-image-upload user-read-playback-state streaming user-read-email playlist-read-collaborative \
    user-modify-playback-state user-read-private playlist-modify-public user-library-modify user-top-read \
    user-read-playback-position user-read-currently-playing playlist-read-private user-follow-read \
    app-remote-control user-read-recently-played playlist-modify-private user-follow-modify user-library-read"

def make_api_cred_dataframe(file_path):
    """
    Function loads the csv file containing Spotify Developer client id/secret
    :param file_path:
    :return:
    """
    api_creds = pd.read_csv(file_path, header=None)
    return api_creds

def make_tokens_from_api_creds(api_creds_df):
    """
    Function gets user_id, client_id, client_secret
    :param api_creds_df:
    :return:
    """
    user_id = api_creds_df[1].iloc[0]
    client_id = api_creds_df[1].iloc[1]
    client_secret = api_creds_df[1].iloc[2]
    return user_id, client_id, client_secret

def get_token_authentication(user_id, client_id, client_secret, scope):
    """
    Function gets token user to authenticate
    :param user_id:
    :param client_id:
    :param client_secret:
    :param scope:
    :return:
    """
    token = util.prompt_for_user_token(
        user_id, scope, client_id = client_id, client_secret = client_secret, redirect_uri='http://127.0.0.1:9090')
    sp = spotipy.Spotify(auth = token)
    return sp

def pipeline_spotipy_authentication():
    """
    Function pipelines the workflow needed to authenticate on Spotify
    :return:
    """
    api_creds = make_api_cred_dataframe(FILE_PATH_API_CREDENTIAL)
    user_id, client_id, client_secret = make_tokens_from_api_creds(api_creds)
    sp = get_token_authentication(user_id, client_id, client_secret, SPOTIPY_SCOPE)
    return sp


In [None]:
sp = pipeline_spotipy_authentication()

## Return Playlist from Link

In [None]:
def get_playist_id_from_link(playlist_link):
    """
    Function returns the Spotify playlist id from the Spotify generated link
    :param playlist_link:
    :return:
    """
    id = playlist_link.split("/")[-1].split("?")[0]
    return id

In [None]:
def get_playlist_name_description_image_url(playlist_id):
    """
    Function returns the associated playlist cover art
    :param playlist_id:
    :return:
    """
    playlist = sp.user_playlist(user_id, playlist_id)
    name = playlist["name"]
    description = playlist['description']
    image_url = playlist["images"][0]["url"]
    collaborative = playlist['collaborative']
    followers = playlist["followers"]["total"]
    owner = playlist["owner"]["display_name"]
    public = playlist["public"]
    return name, description, image_url, collaborative, followers, owner, public

In [None]:
playlist = sp.user_playlist(user_id, playlist_id)
print(playlist.keys())
playlist

In [None]:
id = get_playist_id_from_link("https://open.spotify.com/playlist/2wOJV8mN9c548Qw0PbInCT?si=c5bfe217a16b46be")
get_playlist_name_description_image_url(id)

### Return Playlist Dataframe

In [None]:
def get_playlist_tracks(username, playlist_id):
    """
    Function returns all songs (JSON) for a playlist. Spotipy returns paginated results of 100 songs and this function filters through all pages
    :param username:
    :param playlist_id:
    :return:
    """
    results = sp.user_playlist_tracks(username,playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

def get_playlist_df(playlist_id):
    """
    Function creates a dataframe containing songs and relevant song details from a playlist
    :param playlist_id:
    :return:
    """
    playlist = sp.user_playlist(user_id, playlist_id)
    tracks = get_playlist_tracks(user_id, playlist_id)
    next_uri = playlist['tracks']['next']
    name = playlist['name']
    tracks_df = pd.DataFrame([(track['track']['id'], track['track']['name'],
                               track["track"]["artists"][0]["id"], track['track']['artists'][0]['name'],
                               track["track"]["album"]["id"], track['track']['album']['name'],
                               track['track']['popularity'],
                               parse_date(track['track']['album']['release_date'])
                               if track['track']['album']['release_date'] else None,
                               parse_date(track['added_at']))
                              for track in tracks],
                             columns=['id', 'name', "artist_id", 'artist', "album_id", 'album', 'track popularity', 'release_date', 'added_at'])
    tracks_df['playlist_name'] = name
    tracks_df["release_year"] = tracks_df["release_date"].dt.year
    return tracks_df

In [None]:
playlist_df = get_playlist_df(id)
playlist_df

In [None]:
def get_playlist_count_songs_artists(playlist_df):
    """
    Function gets the amount of songs and count of artists in a playlist dataframe
    :param playlist_df:
    :return:
    """
    count_songs = len(playlist_df["name"])
    count_artists = len(playlist_df["artist"].unique())
    return count_songs, count_artists


### Artist Count Dataframe

In [None]:
ARTIST = "artist"
ID = "id"
SONGS = "songs"
ALBUM = "album"
COUNT = "count"

def make_artist_in_playlist_count_df(playlist_df):
    """
    Function makes a dataframe showing the amount of songs each artist in the playlist has in the playlist
    :param playlist_df: dataframe of the playlist
    :return: artist count dataframe
    """
    artist_count_df = playlist_df.groupby(ARTIST).count()[ID].reset_index().sort_values(ID, ascending=False).rename(columns={ID: COUNT})
    artist_count_df.set_index(ARTIST, inplace=True)
    artist_count_df.index.name = ARTIST
    return artist_count_df

def make_album_in_playlist_count_df(playlist_df):
    """
    Function makes a dataframe showing the amount of songs each album has in the playlist has in the playlist
    :param playlist_df: dataframe of the playlist
    :return: artist count dataframe
    """
    album_count_df = playlist_df.groupby(ALBUM).count()[ID].reset_index().sort_values(ID, ascending=False).rename(columns={ID: COUNT})
    album_count_df.set_index(ALBUM, inplace=True)
    album_count_df.index.name = ALBUM
    return album_count_df

def pipeline_make_count_artist_album_df(playlist_df):
    """

    :param playlist_df:
    :return:
    """
    artist_count_df = make_artist_in_playlist_count_df(playlist_df)
    album_count_df = make_album_in_playlist_count_df(playlist_df)
    return artist_count_df, album_count_df

In [None]:
pipeline_make_count_artist_album_df(playlist_df)

### Plot Date Added

In [None]:
from pylab import *
import calplot

ID = "id"
ADDED_AT_COLUMN = "added_at"
DAY_FREQUENCY = "D"
START_OF_YEAR = "-1-1"
END_OF_YEAR = "-12-31"
COLORWAY_CALENDAR = 'YlGn'

def make_daily_add_series(playlist_df):
    """

    :param playlist_df:
    :return:
    """
    dms = playlist_df.groupby(playlist_df[ADDED_AT_COLUMN].dt.to_period(DAY_FREQUENCY)).count()[ID].to_timestamp()
    max_year = playlist_df[ADDED_AT_COLUMN].dt.to_period(DAY_FREQUENCY).max().year
    min_year = playlist_df[ADDED_AT_COLUMN].dt.to_period(DAY_FREQUENCY).min().year
    idx = pd.date_range(str(min_year) + START_OF_YEAR, str(max_year) + END_OF_YEAR)
    dms.index = pd.DatetimeIndex(dms.index)
    daily_adds = dms.reindex(idx, fill_value=0)
    return daily_adds

def plot_date_added_calendar(daily_adds):
    """

    :param daily_adds:
    :return:
    """
    cmap = cm.get_cmap(COLORWAY_CALENDAR, 10)
    st.pyplot(calplot.calplot(daily_adds, cmap = cmap, figsize = (20, 7)))
    return None

def pipeline_date_added_calendar(playlist_df):
    """

    :param playlist_df:
    :return:
    """
    daily_adds = make_daily_add_series(playlist_df)
    plot_date_added_calendar(daily_adds)
    return None

In [None]:
pipeline_date_added_calendar(playlist_df)

## Song Features

In [None]:
features = list()
for n, chunk_series in playlist_df.groupby(np.arange(len(playlist_df)) // 100)['id']:
    features += sp.audio_features([*map(str, chunk_series)])

features_df = pd.DataFrame.from_dict(filter(None, features))

tracks_with_features_df = playlist_df.merge(features_df, on = ['id'], how = 'inner')

In [None]:
import plotly.graph_objects as go


def make_playlist_track_with_featured_df(playlist_df):
    """

    :return:
    """
    features = list()
    for n, chunk_series in playlist_df.groupby(np.arange(len(playlist_df)) // 100)['id']:
        features += sp.audio_features([*map(str, chunk_series)])
    features_df = pd.DataFrame.from_dict(filter(None, features))
    tracks_with_features_df = playlist_df.merge(features_df, on = ['id'], how = 'inner')
    return tracks_with_features_df

def get_xy_axis_and_data(tracks_with_features_df, yaxis, customdata, xaxis=None):
    """

    :param tracks_with_features_df:
    :param axis:
    :param yaxis:
    :param customdata:
    :return:
    """
    if xaxis is None:
        x_axis = list(tracks_with_features_df.index)
    else:
        x_axis = list(tracks_with_features_df[xaxis])
    y_axis = list(tracks_with_features_df[yaxis])
    customdata_list = list(tracks_with_features_df[customdata])
    return x_axis, y_axis, customdata_list

def plot_scatter(x_axis, y_axis, customdata_list):
    """

    :param x_axis:
    :param y_axis:
    :param customdata_list:
    :return:
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=x_axis, y=y_axis, mode='markers', name='markers',
                             hovertext=customdata_list, hoverlabel=dict(namelength=0), hovertemplate='%{hovertext}<br>Energy: %{y}<br>',
                             marker = dict(size = 8, color = y_axis, colorscale = 'algae', opacity=0.8)))
    fig.update_layout(width = 800, height = 400, margin = dict(l = 50, r = 50, b = 50, t = 50, pad = 4), template = "plotly_dark")
    fig.update_xaxes(showgrid=False, zeroline=False)
    fig.update_yaxes(showgrid=False, zeroline=False)
    fig.show()
    return None

def pipeline_plot_feature(playlist_df, yaxis, customdata, xaxis=None):
    """

    :param playlist_df:
    :param yaxis:
    :param customdata:
    :param xaxis:
    :return:
    """
    tracks_with_features_df = make_playlist_track_with_featured_df(playlist_df)
    x_axis, y_axis, customdata_list = get_xy_axis_and_data(tracks_with_features_df, yaxis, customdata, xaxis)
    plot_scatter(x_axis, y_axis, customdata_list)
    return None

In [None]:
pipeline_plot_feature(playlist_df, "energy", "name", xaxis=None)

## Playlist Length (Time)

In [None]:
def convertMillis(millis):
    """
    Function returns seconds, minutes and hours with millisecond input
    :param millis:
    :return:
    """
    seconds=(millis/1000)%60
    minutes=(millis/(1000*60))%60
    hours=(millis/(1000*60*60))%24
    return seconds, minutes, hours

def get_hour_minutes(minutes, hours):
    """

    :param minutes:
    :param hours:
    :return:
    """
    str_minutes = str(minutes)[0]
    str_hours = str(hours)[0]
    return str_minutes, str_hours

def pipeline_hour_min_length_of_playlist(features_df):
    """

    :return:
    """
    millis = features_df["duration_ms"].sum()
    seconds, minutes, hours = convertMillis(millis)
    str_minutes, str_hours = get_hour_minutes(minutes, hours)
    return str_minutes, str_hours

In [None]:
pipeline_hour_min_length_of_playlist(features_df)

## Artist Count

In [None]:
artist_count = playlist_df.groupby("artist").count()["name"].reset_index().sort_values("name", ascending=True)

In [None]:
import plotly.graph_objects as go

fig = go.Figure(go.Bar(
    x=artist_count["name"],
    y=artist_count["artist"],
    orientation='h', marker_color="#1DB954"))
fig.update_layout(width = 800, height = 400, margin = dict(l = 50, r = 50, b = 50, t = 50, pad = 4), template = "plotly_dark")
fig.update_xaxes(showgrid=False, zeroline=False)
fig.update_yaxes(showgrid=False, zeroline=False)
fig.show()

In [None]:
import plotly.graph_objects as go


def make_artist_count_df(playlist_df):
    """
    Function returns a dataframe with the artist name and count of times in playlist
    :return:
    """
    artist_count = playlist_df.groupby("artist").count()["name"].reset_index().sort_values("name", ascending=True)
    return artist_count

def make_artist_count_plot(artist_count):
    """
    Function plots the times an artist occurs in a playlist
    :param artist_count:
    :return:
    """
    fig = go.Figure(go.Bar(
        x=artist_count["name"],
        y=artist_count["artist"],
        orientation='h', marker_color="#1DB954"))
    fig.update_layout(width = 800, height = 400, margin = dict(l = 50, r = 50, b = 50, t = 50, pad = 4), template = "plotly_dark")
    fig.update_xaxes(showgrid=False, zeroline=False)
    fig.update_yaxes(showgrid=False, zeroline=False)
    st.plotly_chart(fig, use_container_width=True)
    return None

def pipeline_artist_count_plot(playlist_df):
    """
    Function pipelines the workflow required to plot artist count
    :param playlist_df:
    :return:
    """
    artist_count = make_artist_count_df(playlist_df)
    make_artist_count_plot(artist_count)
    return None

In [None]:
pipeline_artist_count_plot(playlist_df)