# Spotify Compare
I want to build something so that I'll be able to compare and contrast my Spotify playlists. My own and my friends. I will first build out the logic, then I will be able to build out a UI where users can drop their own playlists

In [None]:
import pandas as pd
import spotipy
import spotipy.util as util
from dateutil.parser import parse as parse_date

In [None]:
import platform
platform.architecture()

In [None]:
api_creds = pd.read_csv("../data/external/api_keys.csv", header=None)
# api_creds

In [None]:
# Spotify API token
user_id = api_creds[1].iloc[0]
client_id = api_creds[1].iloc[1]
client_secret = api_creds[1].iloc[2]

# All scope
scope = 'ugc-image-upload user-read-playback-state streaming user-read-email playlist-read-collaborative \
user-modify-playback-state user-read-private playlist-modify-public user-library-modify user-top-read \
user-read-playback-position user-read-currently-playing playlist-read-private user-follow-read \
app-remote-control user-read-recently-played playlist-modify-private user-follow-modify user-library-read'

token = util.prompt_for_user_token(
    user_id,
    scope,
    client_id = client_id,
    client_secret = client_secret,
    redirect_uri='http://127.0.0.1:9090')

sp = spotipy.Spotify(auth = token)

In [None]:
results = sp.current_user_saved_tracks()
for item in results['items']:
    track = item['track']
    print(track['name'] + ' - ' + track['artists'][0]['name'])

In [None]:
playlist_id = '37i9dQZF1E9Z5O3LQ5a9Hx'

playlist = sp.user_playlist(user_id, playlist_id)
tracks = playlist['tracks']['items']
next_uri = playlist['tracks']['next']

In [None]:
tracks_df = pd.DataFrame([(track['track']['id'], track['track']['artists'][0]['name'], track['track']['name'],
                           parse_date(track['track']['album']['release_date']) if track['track']['album']['release_date']
                               else None,
                           parse_date(track['added_at']))
                          for track in playlist['tracks']['items']], columns=['id', 'artist', 'name', 'release_date', 'added_at'] )

## Artists in Playlist

In [None]:
ARTIST = "artist"
ID = "id"
SONGS = "songs"

def make_artist_in_playlist_count_df(playlist_df):
    """
    Function makes a dataframe showing the amount of songs each artist in the playlist has in the playlist
    :param playlist_df: dataframe of the playlist
    :return: artist count dataframe
    """
    artist_count = playlist_df.groupby(ARTIST).count()[ID].reset_index().sort_values(ID, ascending=False).rename(columns={ID: SONGS})
    artist_count.set_index(ARTIST, inplace=True)
    return artist_count

In [None]:
# Show
make_artist_in_playlist_count_df(tracks_df)

In [None]:
def get_playlist_tracks(username, playlist_id):
    """
    Function returns all songs (JSON) for a playlist. Spotipy returns paginated results of 100 songs and this function filters through all pages
    :param username:
    :param playlist_id:
    :return:
    """
    results = sp.user_playlist_tracks(username,playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

def get_playlist_df(playlist_id):
    """

    :param playlist_id:
    :return:
    """
    playlist = sp.user_playlist(user_id, playlist_id)
    tracks = get_playlist_tracks(user_id, playlist_id)
    next_uri = playlist['tracks']['next']
    name = playlist['name']
    tracks_df = pd.DataFrame([(track['track']['id'], track['track']['name'],
                               track["track"]["artists"][0]["id"], track['track']['artists'][0]['name'],
                               track["track"]["album"]["id"], track['track']['album']['name'],
                               track['track']['popularity'],
                               parse_date(track['track']['album']['release_date'])
                                   if track['track']['album']['release_date'] else None,
                               parse_date(track['added_at']))
                              for track in tracks],
                             columns=['id', 'name', "artist_id", 'artist', "album_id", 'album', 'track popularity', 'release_date', 'added_at'])
    tracks_df['playlist_name'] = name
    tracks_df["release_year"] = tracks_df["release_date"].dt.year
    return tracks_df

In [None]:
playlist_df_2017 = get_playlist_df(playlist_id)
playlist_df_2017["release_date"].dt.year.mean()
playlist_df_2017

## Count Songs per Decade

In [None]:
# Get decade
decade = 2011 - (2011%10)
decade

In [None]:
def get_decade_from_year(year):
    """
    Function returns the decade (century + decade) for a provided year
    :param year: integer
    :return:
    """
    decade = year - (year%10)
    return decade

In [None]:
decade_dict = dict()

for index, row in playlist_df_2017.iterrows():
    song_decade = get_decade_from_year(row["release_year"])
    if song_decade not in decade_dict:
        decade_dict[song_decade] = 1
    else:
        decade_dict[song_decade] += 1


In [None]:
decade_dict

In [None]:
# Function
COLUMN_RELEASE_YEAR = "release_year"

def make_song_decade_playlist_dict(playlist_dataframe):
    """
    Function counts which decade songs in a playlist (dataframe) are from
    :param playlist_dataframe:
    :return: dictionary counting songs per decade
    """
    decade_dict = dict()
    for index, row in playlist_dataframe.iterrows():
        song_decade = get_decade_from_year(row[COLUMN_RELEASE_YEAR])
        if song_decade not in decade_dict:
            decade_dict[song_decade] = 1
        else:
            decade_dict[song_decade] += 1
    return decade_dict

In [None]:
make_song_decade_playlist_dict(playlist_df_2017)

In [None]:
# Testing with other playlist
TOP_2018 = "37i9dQZF1EjnleGtX5GCCK"

playlist_df_2018 = get_playlist_df(TOP_2018)
make_song_decade_playlist_dict(playlist_df_2018)

### Get Playlist ID from Link

In [None]:
def get_playist_id_from_link(playlist_link):
    """
    Function returns the Spotify playlist id from the Spotify generated link
    :param playlist_link:
    :return:
    """
    id = playlist_link.split("/")[-1].split("?")[0]
    return id

In [None]:
get_playist_id_from_link("https://open.spotify.com/playlist/37i9dQZF1EtnQ0jMYBpCho?si=886b0fd885c7403a")

In [None]:
# 2019
id = get_playist_id_from_link("https://open.spotify.com/playlist/37i9dQZF1EtnQ0jMYBpCho?si=b9eb3b6491af4f50")
playlist_df = get_playlist_df(id)
make_song_decade_playlist_dict(playlist_df)

In [None]:
# 2020
id = get_playist_id_from_link("https://open.spotify.com/playlist/37i9dQZF1EM216Dz9SwARZ?si=194f693849c14b5c")
playlist_df = get_playlist_df(id)
make_song_decade_playlist_dict(playlist_df)

### Average Track Popularity

In [None]:
playlist_df_2017["track popularity"].mean()
playlist_df_2017["track popularity"].min()
playlist_df_2017

## Genres

In [None]:
genre_dict = dict()
for index, row in playlist_df_2017.iterrows():
    artist = sp.artist(row["artist_id"])
    artist_genres_list = artist["genres"]
    for genre in artist_genres_list:
        if genre not in genre_dict:
            genre_dict[genre] = 1
        else:
            genre_dict[genre] += 1
genre_dict

In [None]:
genre_df = pd.DataFrame()
genre_df["genre"] = genre_dict.keys()
genre_df["count"] = genre_dict.values()
genre_df.sort_values("count", ascending=False)

## Calendar

In [None]:
id = get_playist_id_from_link("https://open.spotify.com/playlist/1DiTkTibrpQjPLx7GKIUu1?si=c12fa9135cb94ee5")
playlist_df = get_playlist_df(id)
make_song_decade_playlist_dict(playlist_df)

In [None]:
dms = playlist_df.groupby(playlist_df['added_at'].dt.to_period('D')).count()['id'].to_timestamp()
max_year = playlist_df['added_at'].dt.to_period('D').max().year
min_year = playlist_df['added_at'].dt.to_period('D').min().year

idx = pd.date_range(str(min_year) + '-1-1', str(max_year) + '-12-31')
dms.index = pd.DatetimeIndex(dms.index)
daily_adds = dms.reindex(idx, fill_value=0)

In [None]:
# Create discrete colors
from pylab import *
cmap = cm.get_cmap('YlGn', 10)

import calplot
calplot.calplot(daily_adds, cmap = cmap, figsize = (20, 7))
plt.show()

In [None]:
from pylab import *
import calplot


ADDED_AT_COLUMN = "added_at"
DAY_FREQUENCY = "D"
START_OF_YEAR = "-1-1"
END_OF_YEAR = "-12-31"
COLORWAY_CALENDAR = 'YlGn'


def make_daily_add_series(playlist_df):
    """

    :param playlist_df:
    :return:
    """
    dms = playlist_df.groupby(playlist_df[ADDED_AT_COLUMN].dt.to_period(DAY_FREQUENCY)).count()[ID].to_timestamp()
    max_year = playlist_df[ADDED_AT_COLUMN].dt.to_period(DAY_FREQUENCY).max().year
    min_year = playlist_df[ADDED_AT_COLUMN].dt.to_period(DAY_FREQUENCY).min().year
    idx = pd.date_range(str(min_year) + START_OF_YEAR, str(max_year) + END_OF_YEAR)
    dms.index = pd.DatetimeIndex(dms.index)
    daily_adds = dms.reindex(idx, fill_value=0)
    return daily_adds

def plot_date_added_calendar(daily_adds):
    """

    :param daily_adds:
    :return:
    """
    cmap = cm.get_cmap(COLORWAY_CALENDAR, 10)
    calplot.calplot(daily_adds, cmap = cmap, figsize = (20, 7))
    plt.show()
    return None

def pipeline_date_added_calendar(playlist_df):
    """

    :param playlist_df:
    :return:
    """
    daily_adds = make_daily_add_series(playlist_df)
    plot_date_added_calendar(daily_adds)

In [None]:
pipeline_date_added_calendar(playlist_df)

In [None]:
id = get_playist_id_from_link("https://open.spotify.com/playlist/0Fv83T55JNf7lEyqwuZscq?si=e7e9d03416e44857")
playlist_df = get_playlist_df(id)
pipeline_date_added_calendar(playlist_df)

## Song Features

In [None]:
features = list()

for n, chunk_series in playlist_df_2017.groupby(np.arange(len(playlist_df_2017)) // 100)['id']:
    features += sp.audio_features([*map(str, chunk_series)])

features_df = pd.DataFrame.from_dict(filter(None, features))

tracks_with_features_df = playlist_df_2017.merge(features_df, on = ['id'], how = 'inner')

In [None]:
tracks_with_features_df

In [None]:
from IPython.display import Image
from IPython.core.display import HTML

def feature_extreme(feature, extreme):
    if extreme == 'max':
        df_max = tracks_with_features_df[feature].max()
        df = tracks_with_features_df[tracks_with_features_df[feature] == df_max]

        track_url = sp.track(df['id'].iloc[0])['external_urls']['spotify']

        image_url = sp.track(df['id'].iloc[0])['album']['images'][0]['url']

        print(f"The song with the maximum {feature} in my playlist is {df['name'].iloc[0]}. \
              With a maximum {feature} value of {df_max}. The song is by {df['artist'].iloc[0]}, \
              released on the {df['album'].iloc[0]} album in {df['release_date'].dt.to_period('D').iloc[0]}.",
              f"The track can be listened to here: {track_url}", '\n', '\n')

        return Image(url = image_url, width = 250)

    elif extreme == 'min':
        df_min = tracks_with_features_df[feature].min()
        df = tracks_with_features_df[tracks_with_features_df[feature] == df_min]

        track_url = sp.track(df['id'].iloc[0])['external_urls']['spotify']

        image_url = sp.track(df['id'].iloc[0])['album']['images'][0]['url']

        print(f"The song with the minimum {feature} in my playlist is {df['name'].iloc[0]}. \
              With a minimum {feature} value of {df_min}. The song is by {df['artist'].iloc[0]}, \
              released on the {df['album'].iloc[0]} album in {df['release_date'].dt.to_period('D').iloc[0]}.",
              f"The track can be listened to here: {track_url}", '\n', '\n')

        return Image(url = image_url, width = 250)

    else:
        raise ValueError('extreme value must be "max" or "min".')

In [None]:
SONG_FEATURE_LIST = ['acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']

for feature in SONG_FEATURE_LIST:
    feature_extreme(feature, 'max')