In [115]:
import pandas as pd
import numpy as np
import os
import glob
from datetime import datetime
import matplotlib.pyplot as plt
import plotly.express as px
import dash
from dash.dash_table.Format import Group
from dash import Dash, html, dcc, callback, Output, Input
import plotly.graph_objs as go
import seaborn as sns

## Load dataset 

In [116]:
file_path = '/Users/dantashashou/Downloads/Viz/Lastfm/*.csv' #To your Lastfm File
file_list = glob.glob(file_path)
df = pd.DataFrame()
for file in file_list:
    columns = ['Artist', 'Album', 'Track', 'Date']
    listener = os.path.basename(file.split('.')[0])
    temp_df = pd.read_csv(file, names = columns)
    temp_df = temp_df.assign(Listener = listener)
    df = pd.concat([df, temp_df], ignore_index=True)
#print("Processing file:", file)

### Check Dataset

In [117]:
# df.tail()
# print(df.info())

# select = df[df['Artist'] == 'Jay Chou']
# select.head()
# select = df[df['Album'] == 'Mayday']
# select.head()
# select = df[df['Track'] == 'Mayday']
# select.head()
# select = df[df['Listener'] == 'SilentDefender']
# select.sample(10)

In [118]:
#change the date format to datetime
df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format=True)
print(df['Date'].dtype)

datetime64[ns]


### Count unique number

In [119]:
unique_artists = df['Artist'].nunique()
print(unique_artists)
unique_albums= df['Album'].nunique()
print(unique_albums)
unique_tracks = df['Track'].nunique()
print(unique_tracks)
unique_listeners = df['Listener'].nunique()
print(unique_listeners)

86111
152282
326956
62


### Count most listened

In [120]:
#counts
artist_counts = df['Artist'].value_counts()
album_counts = df['Album'].value_counts()
track_counts = df['Track'].value_counts()
listener_counts = df['Listener'].value_counts()

# Most listened artist
most_listened_artist = artist_counts.idxmax()
most_listened_artist_count = artist_counts.max()

# Most listened album
most_listened_album = album_counts.idxmax()
most_listened_album_count = album_counts.max()

# Most listened track
most_listened_track = track_counts.idxmax()
most_listened_track_count = track_counts.max()

# Most listened listener
most_listened_listener = listener_counts.idxmax()
most_listened_listener_count = listener_counts.max()

print("Most listened artist is: {}, {}".format(most_listened_artist, most_listened_artist_count))
print("Most listened album is: {}, {}".format(most_listened_album, most_listened_album_count))
print("Most listened track is: {}, {}".format(most_listened_track, most_listened_track_count))
print("Most listened listener is: {}, {}".format(most_listened_listener, most_listened_listener_count))

Most listened artist is: Taylor Swift, 613171
Most listened album is: Stay Alive (Prod. SUGA of BTS), 185958
Most listened track is: Stay Alive (Prod. SUGA of BTS), 186251
Most listened listener is: d0ras, 284373


### Rank of most listened

In [121]:
Rankartist = pd.DataFrame({'Artist': artist_counts.index, 'Count': artist_counts.values})
Rankartist10 = Rankartist.head(10)
# Rankartist.head(10)

Rankalbum = pd.DataFrame({'Album': album_counts.index, 'Count': album_counts.values})
Rankalbum10 = Rankalbum.head(10)
# Rankalbum.head(10)

Ranktrack = pd.DataFrame({'Track': track_counts.index, 'Count': track_counts.values})
Ranktrack10 = Ranktrack.head(10)
# Ranktrack.head(10)

Ranklistener = pd.DataFrame({'Listener': listener_counts.index, 'Count': listener_counts.values})
Ranklistener10 = Ranklistener.head(10)
# Ranklistener.head(10)

### Count weekly most listened

In [122]:
weekly_top_artist = df.groupby(df['Date'].dt.to_period('W'))['Artist'].agg(lambda x: x.value_counts().idxmax())
Rankartist_weekly = pd.DataFrame({'Artist': weekly_top_artist.values}, index=weekly_top_artist.index)
# Rankartist_weekly.tail()

weekly_top_album = df[df['Album'] != ''].groupby(df['Date'].dt.to_period('W'))['Track'].agg(lambda x: x.value_counts().idxmax())        
Rankalbum_weekly = pd.DataFrame({'Album': weekly_top_album.values}, index=weekly_top_album.index)
# Rankalbum_weekly.head()

weekly_top_track = df.groupby(df['Date'].dt.to_period('W'))['Track'].agg(lambda x: x.value_counts().idxmax())
Ranktrack_weekly = pd.DataFrame({'Track': weekly_top_track.values}, index=weekly_top_track.index)
# Ranktrack_weekly.head()

weekly_top_listener = df.groupby(df['Date'].dt.to_period('W'))['Listener'].agg(lambda x: x.value_counts().idxmax())
Ranklistener_weekly = pd.DataFrame({'Listener': weekly_top_listener.values}, index=weekly_top_listener.index)
# Ranklistener_weekly.tail()

### Rank of weekly most listened

In [123]:
weekly_top10_artists = (
    df.assign(Week=df['Date'].dt.to_period('W')) 
    .loc[df['Artist'] != '']  
    .groupby(['Week', 'Artist'])
    .size()
    .reset_index(name='Count')
    .groupby('Week', group_keys=False)
    .apply(lambda x: x.nlargest(10, 'Count'))
    .reset_index(drop=True)
)
# weekly_top10_artists.tail()

In [124]:
weekly_top10_albums = (
    df.assign(Week=df['Date'].dt.to_period('W')) 
    .loc[df['Album'] != '']  
    .groupby(['Week', 'Album'])
    .size()
    .reset_index(name='Count')
    .groupby('Week', group_keys=False)
    .apply(lambda x: x.nlargest(10, 'Count'))
    .reset_index(drop=True)
)
# weekly_top10_albums.tail()

In [125]:
weekly_top10_tracks = (
    df.assign(Week=df['Date'].dt.to_period('W')) 
    .loc[df['Track'] != '']  
    .groupby(['Week', 'Track'])
    .size()
    .reset_index(name='Count')
    .groupby('Week', group_keys=False)
    .apply(lambda x: x.nlargest(10, 'Count'))
    .reset_index(drop=True)
)
# weekly_top10_tracks.tail()

In [126]:
weekly_top10_listeners = (
    df.assign(Week=df['Date'].dt.to_period('W')) 
    .loc[df['Listener'] != '']  
    .groupby(['Week', 'Listener'])
    .size()
    .reset_index(name='Count')
    .groupby('Week', group_keys=False)
    .apply(lambda x: x.nlargest(10, 'Count'))
    .reset_index(drop=True)
)
# weekly_top10_listeners.tail()

### 4 Rank figure

In [127]:
Rankartistfig=px.bar(Rankartist10[::-1], x='Count', y='Artist', orientation='h',title='Top 10 Artist of all time',text='Count',template='seaborn')
Rankalbumfig=px.bar(Rankalbum10[::-1], x='Count', y='Album', orientation='h',title='Top 10 Album of all time',text='Count',template='seaborn')
Ranktrackfig=px.bar(Ranktrack10[::-1], x='Count', y='Track', orientation='h',title='Top 10 Track of all time',text='Count',template='seaborn')
Ranklistenerfig=px.bar(Ranklistener10[::-1], x='Count', y='Listener', orientation='h',title='Top 10 Listener of all time',text='Count',template='seaborn')

### Cross Tabulation and Bubble figure

In [128]:
listener_artist = df.groupby(['Listener', 'Artist']).size().reset_index(name='TrackCount').sort_values(by='TrackCount', ascending=False).head(50)
listener_artist_table = listener_artist.pivot_table(index='Listener', columns='Artist', values='TrackCount',fill_value=0)
bubble_df = listener_artist_table.reset_index().melt(id_vars='Listener', var_name='Artist', value_name='TrackCount')
bubblefig = px.scatter(bubble_df, x='Artist', y='Listener', size='TrackCount', color='TrackCount',
                 labels={'TrackCount': 'Track Count'},
                 title='Bubble Plot of Listener-Artist TrackCount')

In [129]:
#listener_artist.head(50)

In [130]:
#listener_artist_table.head()

### Count Track Every Year

In [131]:
df['Year'] = df['Date'].dt.year.fillna(0).astype(int)
total_track = df[df['Year'] != 0].groupby(['Year']).size().reset_index(name='TrackCount')
yearly_track = df.groupby(['Artist','Year']).size().reset_index(name='TrackCount')

# Dashboard

In [147]:
app = Dash('__name__')

week_options = [{'label': str(week), 'value': str(week)} for week in weekly_top10_artists['Week'].unique()]

app.layout = html.Div([
    html.H1(children='Playlist KPI Dashboard',
            style={'textAlign':'center', 'backgroundColor':'lightblue', 'borderRadius':'10px', 'height':'50px'}),
    
    html.Div(
    dcc.Tabs(id='tabs', value='artist-tab', children=[
        dcc.Tab(label=f'Most listened Artist: {most_listened_artist}',value='artist-tab', children=[
            dcc.Graph(id='rankartistfig',figure=Rankartistfig, style={'height': '400px'}),
            html.Label('Pick a date here'),
            dcc.Dropdown(id='dropdown-artist', options=week_options,
                         value=str(weekly_top10_artists['Week'].iloc[-1]), clearable=False),
            dcc.Graph(id='output-dropdown-artist', style={'height': '400px'})
        ],style={'borderRadius': '10px','backgroundColor':'lightblue'}),
        
        dcc.Tab(label=f'Most listened Album: {most_listened_album}',value='album-tab', children=[
            dcc.Graph(id='rankalbumfig',figure=Rankalbumfig, style={'height': '400px'}),
            html.Label('Pick a date here'),
            dcc.Dropdown(id='dropdown-album', options=week_options,
                         value=str(weekly_top10_albums['Week'].iloc[-1]), clearable=False),
            dcc.Graph(id='output-dropdown-album', style={'height': '400px'})
        ],style={'borderRadius': '10px'}),
      
        dcc.Tab(label=f'Most listened Track: {most_listened_track}',value='track-tab', children=[
            dcc.Graph(id='ranktrackfig',figure=Ranktrackfig, style={'height': '400px'}),
            html.Label('Pick a date here'),
            dcc.Dropdown(id='dropdown-track', options=week_options,
                         value=str(weekly_top10_tracks['Week'].iloc[-1]), clearable=False),
            dcc.Graph(id='output-dropdown-track', style={'height': '400px'})
        ],style={'borderRadius': '10px'}),
    
        dcc.Tab(label=f'Most listened Listener: {most_listened_listener}',value='listener-tab', children=[
            dcc.Graph(id='ranklistenerfig',figure=Ranklistenerfig, style={'height': '400px'}),
            html.Label('Pick a date here'),
            dcc.Dropdown(id='dropdown-listener', options=week_options,
                         value=str(weekly_top10_listeners['Week'].iloc[-1]), clearable=False),
            dcc.Graph(id='output-dropdown-listener', style={'height': '400px'})
        ],style={'borderRadius': '10px'}),
    ]),
    
    style={'width': '60%', 'float': 'left'}
        
    ),
    
    
    html.Div([
        dcc.Graph(id='bubblefig',figure=bubblefig, style={'height': '532px'}),
        
        dcc.Input(id='artist-input', type='text', placeholder='Enter an Artist', style={'width': '600px', 'height': '30px'}),
        dcc.Graph(id='track-trend-plot', style={'height': '400px'}),
    ], style={'width': '40%', 'float': 'right'})
    
    



])

#Weekly Bar Chart
def update_output(selected_week, selected_tab, top_data, y_label):
    filtered_data = top_data[top_data['Week'] == selected_week]
    top_data_filtered = filtered_data.groupby(y_label)['Count'].sum().nlargest(10).reset_index()
    
    weekfig = px.bar(
        top_data_filtered[::-1], x='Count', y=y_label, orientation='h',
        labels={'Count': 'Count', y_label: y_label},
        title=f'Top 10 {y_label} during {selected_week}',
        text='Count',
        template='seaborn'
    )
    return weekfig


@app.callback(
    Output('output-dropdown-artist', 'figure'),
    [Input('dropdown-artist', 'value')]
)
def update_output_artist(selected_week):
    return update_output(selected_week, 'artist-tab', weekly_top10_artists, 'Artist')

@app.callback(
    Output('output-dropdown-album', 'figure'),
    [Input('dropdown-album', 'value')]
)
def update_output_album(selected_week):
    return update_output(selected_week, 'album-tab', weekly_top10_albums, 'Album')

@app.callback(
    Output('output-dropdown-track', 'figure'),
    [Input('dropdown-track', 'value')]
)
def update_output_track(selected_week):
    return update_output(selected_week, 'track-tab', weekly_top10_tracks, 'Track')

@app.callback(
    Output('output-dropdown-listener', 'figure'),
    [Input('dropdown-listener', 'value')]
)
def update_output_listener(selected_week):
    return update_output(selected_week, 'listener-tab', weekly_top10_listeners, 'Listener')

#Line Plot
@app.callback(
    Output('track-trend-plot', 'figure'),
    [Input('artist-input', 'value')]
)
def update_track_trend_plot(selected_artist):
    if not selected_artist:
        fig = px.line(
            total_track, x='Year', y='TrackCount',
            labels={'TrackCount': 'TrackCount', 'Year': 'Year'},
            title=f"Total Count of each year",
            template='seaborn'
        )
        fig.update_layout(xaxis_range=[2000, 2023])
        return fig
    
    filtered_data = yearly_track[yearly_track['Artist'] == selected_artist]
    
    fig = px.line(
        filtered_data, x='Year', y='TrackCount',
        labels={'TrackCount': 'TrackCount', 'Year': 'Year'},
        title=f"Track Count of {selected_artist} each year",
        template='seaborn'
    )
    fig.update_layout(xaxis_range=[2000, 2023])
    
    return fig


if __name__ == '__main__':
    app.run(debug=True)