In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime as dt
from dateutil.relativedelta import relativedelta

### Enter Last.FM Username Here to get CSV Data 
### Place File in Same Directory
### Link: https://benjaminbenben.com/lastfm-to-csv/

### Enter Last.FM Username Here to get CSV Data 
### Place File in Same Directory
### Link: https://benjaminbenben.com/lastfm-to-csv/

In [2]:
column_names = ['Artist', 'Album', 'Title', 'Date']
df = pd.read_csv('DG716.csv', names=column_names)
df

Unnamed: 0,Artist,Album,Title,Date
0,Destroy Lonely,No Stylist,VTMNTSCOAT,03 Jan 2024 19:15
1,Pop Smoke,Meet The Woo 2 (Deluxe),Dreaming,03 Jan 2024 19:13
2,EST Gee,I NEVER FELT NUN,Get Em' Geeski,03 Jan 2024 19:10
3,Future,Future Hndrxx Presents: The WIZRD,Talk Shit Like a Preacher,03 Jan 2024 19:07
4,Mac Miller,Faces,Funeral,03 Jan 2024 19:02
...,...,...,...,...
77989,Logic,The Incredible True Story,Run It,05 Dec 2019 18:26
77990,Logic,Under Pressure (Deluxe Version),Metropolis,05 Dec 2019 18:23
77991,Eminem,The Eminem Show,Square Dance,05 Dec 2019 18:19
77992,Stormzy,Sounds of the Skeng - Single,Sounds Of The Skeng,05 Dec 2019 18:14


In [3]:
df.isna().any()

Artist    False
Album      True
Title     False
Date      False
dtype: bool

In [4]:
def load_and_clean_data(df):
    df = df.dropna()
    return df

df = load_and_clean_data(df)

df

Unnamed: 0,Artist,Album,Title,Date
0,Destroy Lonely,No Stylist,VTMNTSCOAT,03 Jan 2024 19:15
1,Pop Smoke,Meet The Woo 2 (Deluxe),Dreaming,03 Jan 2024 19:13
2,EST Gee,I NEVER FELT NUN,Get Em' Geeski,03 Jan 2024 19:10
3,Future,Future Hndrxx Presents: The WIZRD,Talk Shit Like a Preacher,03 Jan 2024 19:07
4,Mac Miller,Faces,Funeral,03 Jan 2024 19:02
...,...,...,...,...
77989,Logic,The Incredible True Story,Run It,05 Dec 2019 18:26
77990,Logic,Under Pressure (Deluxe Version),Metropolis,05 Dec 2019 18:23
77991,Eminem,The Eminem Show,Square Dance,05 Dec 2019 18:19
77992,Stormzy,Sounds of the Skeng - Single,Sounds Of The Skeng,05 Dec 2019 18:14


<h2>Format Dates

In [5]:
df = df.copy()

df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
df['Date'] = df['Date'].dt.tz_localize(None)
df.head()

Unnamed: 0,Artist,Album,Title,Date
0,Destroy Lonely,No Stylist,VTMNTSCOAT,2024-01-03 14:15:00
1,Pop Smoke,Meet The Woo 2 (Deluxe),Dreaming,2024-01-03 14:13:00
2,EST Gee,I NEVER FELT NUN,Get Em' Geeski,2024-01-03 14:10:00
3,Future,Future Hndrxx Presents: The WIZRD,Talk Shit Like a Preacher,2024-01-03 14:07:00
4,Mac Miller,Faces,Funeral,2024-01-03 14:02:00


<h1>Grouped by Album plays

In [6]:
# Group the dataframe by 'Album' and count the number of occurrences
grouped_df = df.groupby('Album').count()
# Sort the grouped dataframe by 'Title' in descending order
sorted_df = grouped_df.sort_values(by='Title', ascending=False)
# Get the top 5 rows
top_five = sorted_df.head(10)
# Drop the 'Title' and 'hour' columns
dropped_df = top_five.drop(columns=['Title', 'Date'])
# Rename the 'Artist' column to 'Plays'
top_ten_albums = dropped_df.rename(columns={'Artist': 'Plays'})
top_ten_albums

Unnamed: 0_level_0,Plays
Album,Unnamed: 1_level_1
Unreleased,966
No Pressure,852
ROCKSTAR STATUS,784
Welcome to O'Block,761
The Goat,753
Legends Never Die,696
Death Race for Love,688
Just Cause Y'all Waited 2 (Deluxe),594
Eternal Atake (Deluxe) - LUV vs. The World 2,557
Infatuation,539


<h1>Grouped by Artist plays

In [7]:
grouped_df = df.groupby('Artist').count()
sorted_df = grouped_df.sort_values(by='Title', ascending=False)
top_five = sorted_df.head(10)
dropped_df = top_five.drop(columns=['Title', 'Date'])
top_ten_artists = dropped_df.rename(columns={'Artist': 'Plays'})
top_ten_artists

Unnamed: 0_level_0,Album
Artist,Unnamed: 1_level_1
Juice WRLD,7728
Logic,3723
Drake,3131
Lil Durk,2273
Lil Uzi Vert,2146
King Von,2118
Kanye West,2001
Mac Miller,1910
Eminem,1817
Future,1561


<h1>Grouped by Artist Plays

In [8]:
grouped_df = df.groupby(['Artist', 'Title']).size().reset_index(name='Plays')
sorted_df = grouped_df.sort_values(by='Plays', ascending=False)
top_ten_songs = sorted_df.head(10)
top_ten_songs

Unnamed: 0,Artist,Title,Plays
5146,Lil Durk,Doin Too Much,142
3934,Juice WRLD,Empty,130
6117,Mac Miller,Funeral,121
4855,King Von,Mine Too,117
4041,Juice WRLD,Rental,109
5206,Lil Durk,Redman,108
3883,Juice WRLD,Ashamed,104
4024,Juice WRLD,On Your Mind,102
9265,Vince Staples,Might Be Wrong (feat. Haneef Talib aka GeNNo &...,102
3992,Juice WRLD,Lost Too Many,102


<h1> Most plays in the last year

In [9]:
today = dt.today().strftime('%Y-%m-%d')
one_year_ago = (dt.today() - pd.DateOffset(years=1)).strftime('%Y-%m-%d')
last_year = df[df['Date'].between(one_year_ago, today)]
last_year.head()


Unnamed: 0,Artist,Album,Title,Date
0,Destroy Lonely,No Stylist,VTMNTSCOAT,2024-01-03 14:15:00
1,Pop Smoke,Meet The Woo 2 (Deluxe),Dreaming,2024-01-03 14:13:00
2,EST Gee,I NEVER FELT NUN,Get Em' Geeski,2024-01-03 14:10:00
3,Future,Future Hndrxx Presents: The WIZRD,Talk Shit Like a Preacher,2024-01-03 14:07:00
4,Mac Miller,Faces,Funeral,2024-01-03 14:02:00


<h1>Most Listened to Artists in the Last Year

In [10]:
most_listened_last_year = last_year.groupby('Artist').count().sort_values(by='Title', ascending=False).head()
most_listened_last_year.drop(columns=['Album', 'Date'])

Unnamed: 0_level_0,Title
Artist,Unnamed: 1_level_1
Drake,794
Lil Uzi Vert,737
Travis Scott,623
Juice WRLD,567
Lil Durk,426


In [11]:
albums_last_year = last_year.groupby('Album').count().sort_values(by='Date', ascending=False).drop(columns=[('Title'), ('Date')])
albums_last_year.head(10)

Unnamed: 0_level_0,Artist
Album,Unnamed: 1_level_1
UTOPIA,484
Pink Tape,458
For All the Dogs,324
AUSTIN,176
a Gift & a Curse,126
Grandson,117
Almost Healed,116
i didn't mean to haunt you,115
Up 2 Më,103
Her Loss,102


<h1> Plays from Last Calendar Year

In [12]:

start_date = '2023-01-01'
end_date = '2024-01-01'

last_year = df[df['Date'].between(start_date, end_date)]

most_listened_last_calendar_year = last_year.groupby('Artist').count().sort_values(by='Title', ascending=False).head(15)
most_listened_last_calendar_year.drop(columns=['Album', 'Date'])

Unnamed: 0_level_0,Title
Artist,Unnamed: 1_level_1
Drake,1129
Juice WRLD,926
Lil Uzi Vert,903
Lil Durk,739
Travis Scott,738
Future,572
King Von,543
Logic,533
Yeat,526
Kanye West,455


In [13]:
albums_last_year = last_year.groupby('Album').count().sort_values(by='Date', ascending=False).drop(columns=[('Title'), ('Date')])
albums_last_year.head(10)

Unnamed: 0_level_0,Artist
Album,Unnamed: 1_level_1
UTOPIA,483
Pink Tape,457
For All the Dogs,321
Her Loss,255
HEROES & VILLAINS,222
I NEVER LIKED YOU,188
7220,178
AUSTIN,175
i didn't mean to haunt you,172
Welcome to O'Block,149


<h1>Graphed Data

<h3> Monthly Artist Data

In [14]:
df['Date'] = pd.to_datetime(df['Date'])
grouped_by_month = df.set_index('Date', inplace=True)
grouped_by_month = df.resample('M').Artist.value_counts().sort_index()

<h3>Plays Per Month

In [15]:
grouped_by_month = grouped_by_month.unstack(fill_value=0)

In [16]:
pd.options.plotting.backend = "plotly"
fig = grouped_by_month[:-1].plot(template = 'plotly_dark')
fig.update_layout(autosize=False, width=1500, height=700)
fig.update_yaxes(title_text="Plays")
fig.show()

  v = v.dt.to_pydatetime()


Discoveries:
1. Juice WRLD is by far the most listened to artist (Most Replayable)
2. Eminem started strong but around mid 2020 fell behind
3. Drake is the most consistent
4. I listened to less artists prior to 2022 but listened these artists more

<h2>Difference in Trends

In [17]:
grouped_by_month.head()

Artist,$NOT,$Tik,070 Beheard,070 Shake,070 Shake & Christine and the Queens,070 Shake & Ken Carson,1oneam,2 Chainz,21 Savage,21 Savage & Metro Boomin,...,sevensevenseven,sgpwes,slowthai,squeeda,ssgkobe,tevomxntana,titose,tobi lou,yaya bey,【UNLUCKY】ヌ竹演
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-12-31,0,0,0,0,0,0,0,0,2,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-31,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2020-02-29,0,0,0,2,0,0,0,0,7,0,...,0,0,0,0,0,0,0,0,0,0
2020-03-31,0,0,0,2,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2020-04-30,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Creating a time limit between last months last day and the month prior

In [18]:
previous_month_last_day = (dt.today() - pd.DateOffset(months=2) + pd.offsets.MonthEnd(1)).strftime('%Y-%m-%d')
last_month_last_day = (dt.today() - pd.DateOffset(months=1) + pd.offsets.MonthEnd(1)).strftime('%Y-%m-%d')

In [19]:
month_before = grouped_by_month.loc[previous_month_last_day].sort_values(ascending=False)
latest_month = grouped_by_month.loc[last_month_last_day].sort_values(ascending=False)
difference = latest_month - month_before

KeyError: '2024-03-31'

<h2> Listening Differences Between Months

In [None]:
positive_difference = difference[difference.gt(0)].sort_values(ascending=False)
positive_difference.head()

In [None]:
negative_difference = difference[difference.lt(0)].sort_values(ascending=True)
negative_difference.head()

<h2> Listening Patterns

In [None]:
data = []
for row in df.index:
    date = str(row).split(' ')[0]
    time = str(row).split(' ')[1]
    hour = time.split(':')[0]
    date_obj = pd.to_datetime(date)
    day_of_week = date_obj.day_name()
    data.append([day_of_week, hour])

df_new = pd.DataFrame(data, columns=['day_of_week', 'hour_of_day'])
df_heatmap = df_new.groupby(['day_of_week', 'hour_of_day']).size().unstack()

# Fill NaN values with 0
df_heatmap = df_heatmap.fillna(0)

# Convert the DataFrame to a matrix
heatmap_data = df_heatmap.values

# Create the heatmap
fig = go.Figure(data=go.Heatmap(
    z=heatmap_data,
    x=df_heatmap.columns.tolist(),
    y=df_heatmap.index.tolist(),
    colorscale='Viridis'))

fig.update_layout(
    autosize=False,
    width=1200,
    height=600,
    template = 'plotly_dark',
    title='Listening Patterns',
    xaxis_nticks=24,  # This ensures that all hours are displayed on the x-axis
    yaxis=dict(categoryorder='array', categoryarray= ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday', 'Sunday']),  # This ensures that the days of the week are displayed in the correct order
    xaxis_title="Hour of Day",
    yaxis_title="Day of Week"
)

fig.show()

In [None]:
new_df = df.copy()
# Convert the timestamp to datetime and extract the hour
new_df['hour'] = new_df.index.hour

# Get the count of scrobbles for each hour
new_df_counts = new_df.groupby('hour').size().reset_index(name='counts')


In [None]:
r = new_df_counts['counts'].tolist()
theta = (new_df_counts['hour'].values * 15) - 7.5
width = [15]*24

ticktexts = [f'{i}:00' if i % 3 == 0 else '' for i in np.arange(24)]

fig = go.Figure(go.Barpolar(
    r=r,
    theta=theta,
    width=width,
    marker_color=new_df_counts['counts'],
    marker_colorscale='reds',
    marker_line_color="white",
    marker_line_width=2,
    opacity=0.8
))

fig.update_layout(
    title="Scrobble Clock",
    template='plotly_dark',
    autosize=False,
    width=1300,
    height=600,
    polar=dict(
        hole=0.4,
        bgcolor='rgb(223, 223,223)',
        radialaxis=dict(
            showticklabels=False,
            ticks='',
            linewidth=2,
            linecolor='white',
            showgrid=False,
        ),
        angularaxis=dict(
            tickvals=np.arange(0,360,15),
            ticktext=ticktexts,
            showline=True,
            direction='clockwise',
            period=24,
            linecolor='white',
            gridcolor='white',
            showticklabels=True,
            ticks=''
        )
    )
)

fig.show()