In [None]:
import spotipy
import pandas as pd
from dotenv import load_dotenv
import requests
from io import BytesIO
# Environment variables
load_dotenv()

# Importing required libraries
import pandas as pd
import duckdb as ddb
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
import matplotlib
plt.rcParams['animation.embed_limit'] = 2**130

In [None]:
#OAUTH
from spotipy.oauth2 import SpotifyOAuth

scope = "user-library-read"

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

In [None]:
%%script false
#CLIENT CREDENTIALS
from spotipy.oauth2 import SpotifyClientCredentials

auth_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(auth_manager=auth_manager)

In [None]:
PLAYLIST_ID = '07KajudA1Z0xAmqwOKzvTk'

In [None]:
#get tracks by playlist_id
tracks_by_playlist = []
playlist = sp.playlist_items(PLAYLIST_ID)
while playlist:
    tracks_by_playlist+=playlist["items"]
    
    if playlist['next']:
        playlist = sp.next(playlist)
    else:
        playlist = None

In [None]:
parsed_tracks = []

for track in tracks_by_playlist:
    track_data = track["track"]
    
    parsed_track_data = {"added_at":track["added_at"][:10],
                         "title":track_data["name"], 
                         "artist_id":track_data["artists"][0]["id"], 
                         "artist_name":track_data["artists"][0]["name"],
                         "track_id":track_data["id"]
                         }
    
    parsed_tracks.append(parsed_track_data)

In [None]:
tracks_df = pd.DataFrame(parsed_tracks)
tracks_df

In [None]:
#get profile image url for each artist
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
        
playlist_artist_ids = tracks_df["artist_id"].unique()
artist_images_urls = []
for artist_ids_chunk in chunks(playlist_artist_ids, 50):
    res = sp.artists(artist_ids_chunk)["artists"]

    artist_images_urls+=[{"artist_image_url":i["images"][-1]["url"], "artist_id":i["id"]} for i in res]

artist_images_df = pd.DataFrame(artist_images_urls)

In [None]:
#preload artist images and cache them in a dict
loaded_images = {}
for image_url in artist_images_df["artist_image_url"]:
    response = requests.get(image_url)
    img = plt.imread(BytesIO(response.content),format='jpeg')
    loaded_images[image_url] = img
    

In [None]:
# calculate tracks by artist per date
# groups track_count by date, crossjoins all artists with all dates then uses cumsum to get track count up to each date
# joins with artist image url for picture lookup
grouped_track_data = ddb.sql("""
        WITH distinct_dates AS (
            SELECT DISTINCT added_at as date
            FROM tracks_df
        ),
        grouped_data AS (
            SELECT artist_id, 
            artist_name, 
            added_at AS date, 
            count(distinct(track_id)) as count_tracks
            FROM tracks_df
            GROUP BY artist_id, artist_name, added_at
        ),
        grouped_data_all_dates AS (
            SELECT DISTINCT gd.artist_id, 
            gd.artist_name, 
            dd.date, 
            (CASE WHEN gd.date=dd.date THEN gd.count_tracks ELSE NULL END) as count_tracks
            FROM grouped_data gd
            CROSS JOIN distinct_dates dd
        ),
        grouped_data_cumsum AS (
            SELECT artist_id, artist_name, date,
            SUM(count_tracks) OVER (PARTITION BY artist_id, artist_name ORDER BY date) as track_count
            FROM grouped_data_all_dates
        )
        SELECT DISTINCT gdc.*,  aid.artist_image_url
        FROM grouped_data_cumsum as gdc 
        LEFT JOIN artist_images_df aid ON aid.artist_id=gdc.artist_id
        WHERE gdc.track_count IS NOT NULL ORDER BY gdc.date, gdc.track_count asc
        """).df()
grouped_track_data = grouped_track_data.set_index("date")

In [None]:
def get_top_artists_by_date(date, n):
    #df_count = df_count.sort_values(by="count", ascending=True)[-n:]
    return  grouped_track_data[grouped_track_data.index==date][-n:]


In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
height = 0.8
def draw_barchart(date, n):
    df_count = get_top_artists_by_date(date, n)
    ax.clear()
    recs = ax.barh(df_count['artist_name'], df_count['track_count'], height=height)
    
    
    for i, (value, image_url) in enumerate(zip(df_count['track_count'], df_count['artist_image_url'])):
        #ax.text(value+0.4, i,     value,           ha='left')   # 38194.2: value
        bar_height = 15
        if n==10:
            bar_height = 32
        img = loaded_images[image_url]
        im = OffsetImage(img, zoom=bar_height/len(img))
        im.image.axes = ax
        ab = AnnotationBbox(im, (value, i), xybox=(-bar_height/2.0, 0), frameon=False,
                            xycoords='data', boxcoords="offset points", pad=0)
        ax.add_artist(ab)
    # Add year right middle portion of canvas
    ax.text(1, 0.4, date, transform=ax.transAxes, size=46, ha='right')
    
    #plt.xlim(0, max(df_count['count']) * 1.05)
    #plt.ylim(-0.5, len(df_count['artist_name']) - 0.5)
    plt.show()
    return recs.patches
    

#test drawing
a = draw_barchart(tracks_df["added_at"].max(), 10)

In [None]:
dates = tracks_df["added_at"].unique()
dates.sort()

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
animator = animation.FuncAnimation(fig, draw_barchart, frames=dates, fargs=(10,), blit=True)
HTML(animator.to_jshtml()) 