# Spotify "Your Top Songs" Timeline

#### By [Kavish Hukmani](https://kavishhukmani.me/)

-----

**Instructions:** Click on the ⏩ button above and confirm

-----

In [None]:
import os
from collections import Counter

import pandas as pd

import plotly.graph_objects as go
import plotly.express as px
from ipywidgets import widgets

from sklearn.preprocessing import MultiLabelBinarizer

import spotipy
from spotipy.oauth2 import  SpotifyPKCE
from spotipy.cache_handler import MemoryCacheHandler

In [None]:
SPOTIPY_CLIENT_ID="c1928ac0b8ff4ab7a53fa8bf4bc0dabf" # In plain text for binder to play nicely

SPOTIPY_REDIRECT_URI="http://127.0.0.1:9090"

In [None]:
sp = spotipy.Spotify(auth_manager= SpotifyPKCE(client_id=SPOTIPY_CLIENT_ID, redirect_uri=SPOTIPY_REDIRECT_URI, scope=['playlist-read-private'], open_browser=False, cache_handler=MemoryCacheHandler()))

In [None]:
response = sp.current_user_playlists()

playlists = []
for item in response['items']:
    if item['name'].startswith("Your Top Songs") and item['owner']['display_name'] == "Spotify":
        playlists.append({
            "playlist_year": item["name"][-4:],
            "playlist_name": item["name"],
            "playlist_id": item["id"]
        })
        
playlists = pd.DataFrame(playlists)
playlists = playlists.sort_values("playlist_year", ignore_index=True)

# playlists

In [None]:
tracks = []
for idx, row in playlists.iterrows():
    # print(f"Extracting {row['playlist_name']}...")
    response = sp.playlist(row["playlist_id"])
    
    for item in response['tracks']['items']:
        tracks.append({
            "name": item['track']['name'],
            "artists": [i['name'] for i in item['track']['artists']],
            "album": item['track']['album']['name'],
            "release_year": item['track']['album']['release_date'][:4],
            "duration": item['track']['duration_ms']/1000,
            "track_id": item['track']['id'],
            "artist_id": [i['id'] for i in item['track']['artists']],
            "album_id": item['track']['album']['id'],
            "playlist_year": row["playlist_year"],
            "playlist_name": row["playlist_name"]
        })
        
tracks = pd.DataFrame(tracks)
tracks['my_id'] = tracks['name'] + "--" + tracks['artists'].apply(', '.join) + "--" + tracks['album']
# tracks.head()

In [None]:
fig = px.violin(tracks, x="playlist_year", y="duration", color="playlist_year", box=True, points="all",
                hover_data=['name','artists','album','duration'],template='plotly_white',
                labels={
                    "playlist_year": "Year",
                    "duration": "Song Length (secs)",
                    "name": "Title",
                    "artists": "Artist(s)",
                    "album": "Album"
                 },
                title="Song Length Across Years",
                height=600)

fig.show()

In [None]:
color_map = {i['name']: i['marker']['color'] for i in fig['data']}

In [None]:
temp = tracks.groupby('my_id').playlist_year.apply(list).reset_index()
temp = temp.merge(tracks.drop(['playlist_year', 'playlist_name'],axis=1), on='my_id', how='left')

mlb = MultiLabelBinarizer()

tracks_encoded = pd.concat([temp,pd.DataFrame(mlb.fit_transform(temp['playlist_year']),columns=mlb.classes_, index=temp.index)], axis=1)
tracks_encoded = tracks_encoded.drop_duplicates('my_id').reset_index(drop=True)
tracks_encoded = tracks_encoded.drop('playlist_year',axis=1)

years = list(mlb.classes_)

tracks_encoded["occurances"] = tracks_encoded[years].sum(axis=1)

# tracks_encoded.head()

In [None]:
title_label = widgets.HTML(value="<b>Song Churn Across Years</b>")

year_filter = widgets.Dropdown(
    options=years,
    value=years[-1],
    description='Year:',
)

dims = []
for year in years:
    dims.append(go.parcats.Dimension(
        values=tracks_encoded[year], 
        label=year, categoryarray=[1, 0], 
        ticktext=['Top 100 🕪', '🔇']
    ))

# Create parcats trace
color = tracks_encoded[years[-1]]
colorscale = [[0, 'lightsteelblue'], [1, color_map[years[-1]]]]

g1 = go.FigureWidget(data = [go.Parcats(dimensions=dims,
        line={'color': color, 'colorscale': colorscale},
        hoveron='color', hoverinfo='skip',
        arrangement='freeform')],
                     layout = go.Layout(title=f'{years[-1]} Song Occurance Flow'))


temp = tracks_encoded[(tracks_encoded[years[-1]] == 1)&(tracks_encoded['occurances']>1)]
temp = temp.sort_values(['occurances']+years[::-1], ascending=False)
temp = temp[['name','artists','album']+years]
temp['artists'] = temp['artists'].str.join(', ')

header_values=['<b>Title</b>', '<b>Artist(s)</b>', '<b>Album</b>'] + [f'<b>{i}</b>' for i in years]
color_values = [["#EBF0F8"]*len(temp), ["#EBF0F8"]*len(temp), ["#EBF0F8"]*len(temp)] + [["#EBF0F8" if j == 0 else color_map[i] for j in temp[i]] for i in years]
temp[years] = temp[years].applymap({1:'Top 100 🕪', 0:'🔇'}.get)
cell_values=[temp[i] for i in temp.columns]

g2 = go.FigureWidget(data=[go.Table(
    header=dict(
        values=header_values,
        line_color='white', fill_color='white',
        align='center',font=dict(color='black', size=12)),
    cells=dict(
        values=cell_values,
        fill_color=color_values))],
                    layout=go.Layout(height=1000, title=f'{years[-1]} Songs Details'))

def response(change):
    with g1.batch_update():
        color = tracks_encoded[year_filter.value];
        colorscale = [[0, 'lightsteelblue'], [1, color_map[year_filter.value]]]
        g1.data[0].line.color = color
        g1.data[0].line.colorscale = colorscale
        g1.layout.title.text = f'{year_filter.value} Song Occurance Flow'
        
    with g2.batch_update():
        temp = tracks_encoded[(tracks_encoded[year_filter.value] == 1)&(tracks_encoded['occurances']>1)]
        temp = temp.sort_values(['occurances']+years[::-1], ascending=False)
        temp = temp[['name','artists','album']+years]
        temp['artists'] = temp['artists'].str.join(', ')

        color_values = [["#EBF0F8"]*len(temp), ["#EBF0F8"]*len(temp), ["#EBF0F8"]*len(temp)] + [["#EBF0F8" if j == 0 else color_map[i] for j in temp[i]] for i in years]
        temp[years] = temp[years].applymap({1:'Top 100 🕪', 0:'🔇'}.get)
        cell_values=[temp[i] for i in temp.columns]
        
        g2.data[0].cells.values = cell_values
        g2.data[0].cells.fill.color = color_values
        g2.layout.title.text = f'{year_filter.value} Song Occurance Flow'

year_filter.observe(response, names="value")
container = widgets.HBox([title_label, year_filter])
widgets.VBox([container,g1, g2])

In [None]:
artists = list(set([j for i in tracks_encoded.artists.to_list() for j in i]))

artist_presence = []
for artist in artists:
    temp = (tracks_encoded.loc[tracks_encoded['artists'].apply(lambda x: artist in x), years]).sum().to_dict()
    temp["artist"] = artist
    artist_presence.append(temp)
    
artist_presence = pd.DataFrame(artist_presence)
artist_presence['occurances'] = artist_presence[years].sum(axis=1)
artist_presence = artist_presence.sort_values('occurances', ascending=False)

fig = px.imshow(artist_presence[years].head(10).values.tolist(),
                labels=dict(x="Year", y="Artist", color="Occurances"),
                x=years,
                y=artist_presence.artist.to_list()[:10],
                height=800,
                title="Presence Of Your Top 10 Artists")
fig.update_xaxes(side="top")
fig.show()

In [None]:
artist_ids = list(set([j for i in tracks_encoded.artist_id.to_list() for j in i]))

artist_genres = {}
for i in range(0, len(artist_ids), 50):
    response = sp.artists(artist_ids[i:i+50])
    for item in response['artists']:
        artist_genres[item['name']] = item['genres']


artist_presence['genres'] = artist_presence['artist'].map(artist_genres)

genre_year_counter = {year:[] for year in years}

for idx, row in artist_presence.iterrows():
    for year in years:
        genre_year_counter[year] = genre_year_counter[year] + (row['genres']*row[year])
        
genre_year_counter = {k:Counter(v) for k,v in genre_year_counter.items()}

genre_total_counter = Counter()
for i in genre_year_counter.values():
    genre_total_counter.update(i)

top_genres = [i[0] for i in genre_total_counter.most_common(5)]

genre_year_counter = pd.DataFrame(genre_year_counter)
genre_year_counter = genre_year_counter[genre_year_counter.index.isin(top_genres)]
genre_year_counter = genre_year_counter.reindex(top_genres).astype(int)
genre_year_counter = genre_year_counter.T

In [None]:
fig = go.Figure()

for idx,row in genre_year_counter.iterrows():
    fig.add_trace(go.Scatterpolar(
      r=row,
      theta=genre_year_counter.columns,
      fill='toself',
      name=idx
    ))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    )),
  height=800,
  title='Presence Of Your Top 5 Genres')

fig.show()