# Exploring Spotify Historical Data

## Import Data

In [1]:
import pandas as pd
import numpy as np
import dash
from dash import dcc
from dash import html
import plotly.graph_objs as go

In [2]:
df = pd.read_csv("data/spotify_history.csv")

## Explore Data

In [3]:
df.head()

Unnamed: 0,spotify_track_uri,ts,platform,ms_played,track_name,artist_name,album_name,reason_start,reason_end,shuffle,skipped
0,2J3n32GeLmMjwuAzyhcSNe,2013-07-08 02:44:34,web player,3185,"Say It, Just Say It",The Mowgli's,Waiting For The Dawn,autoplay,clickrow,False,False
1,1oHxIPqJyvAYHy0PVrDU98,2013-07-08 02:45:37,web player,61865,Drinking from the Bottle (feat. Tinie Tempah),Calvin Harris,18 Months,clickrow,clickrow,False,False
2,487OPlneJNni3NWC8SYqhW,2013-07-08 02:50:24,web player,285386,Born To Die,Lana Del Rey,Born To Die - The Paradise Edition,clickrow,unknown,False,False
3,5IyblF777jLZj1vGHG2UD3,2013-07-08 02:52:40,web player,134022,Off To The Races,Lana Del Rey,Born To Die - The Paradise Edition,trackdone,clickrow,False,False
4,0GgAAB0ZMllFhbNc3mAodO,2013-07-08 03:17:52,web player,0,Half Mast,Empire Of The Sun,Walking On A Dream,clickrow,nextbtn,False,False


### Check for any missing rows

In [4]:
df.isnull().sum()

spotify_track_uri      0
ts                     0
platform               0
ms_played              0
track_name             0
artist_name            0
album_name             0
reason_start         143
reason_end           117
shuffle                0
skipped                0
dtype: int64

## Time-based Analysis

### Total Streaming Hours

In [5]:
total_ms_played = df["ms_played"].sum()
total_hours_played = total_ms_played / (1000 * 60 * 60)
print(total_hours_played)

5341.536370833333


### Processing the `ts` column to get `hours`, `day_of_week`, `year` etc

In [6]:
df['ts'] = pd.to_datetime(df['ts'])
df['hour'] = df['ts'].dt.hour
df['day_of_week'] = df['ts'].dt.dayofweek
df['year'] = df['ts'].dt.year

### Check the data modification

In [7]:
df.head()

Unnamed: 0,spotify_track_uri,ts,platform,ms_played,track_name,artist_name,album_name,reason_start,reason_end,shuffle,skipped,hour,day_of_week,year
0,2J3n32GeLmMjwuAzyhcSNe,2013-07-08 02:44:34,web player,3185,"Say It, Just Say It",The Mowgli's,Waiting For The Dawn,autoplay,clickrow,False,False,2,0,2013
1,1oHxIPqJyvAYHy0PVrDU98,2013-07-08 02:45:37,web player,61865,Drinking from the Bottle (feat. Tinie Tempah),Calvin Harris,18 Months,clickrow,clickrow,False,False,2,0,2013
2,487OPlneJNni3NWC8SYqhW,2013-07-08 02:50:24,web player,285386,Born To Die,Lana Del Rey,Born To Die - The Paradise Edition,clickrow,unknown,False,False,2,0,2013
3,5IyblF777jLZj1vGHG2UD3,2013-07-08 02:52:40,web player,134022,Off To The Races,Lana Del Rey,Born To Die - The Paradise Edition,trackdone,clickrow,False,False,2,0,2013
4,0GgAAB0ZMllFhbNc3mAodO,2013-07-08 03:17:52,web player,0,Half Mast,Empire Of The Sun,Walking On A Dream,clickrow,nextbtn,False,False,3,0,2013


## Artist and Track Analysis

### Total hours played per day (group by day of the week)

In [8]:
hours_per_day = df.groupby('day_of_week')['ms_played'].sum() / (1000 * 60 * 60)

# Optionally, map day_of_week to actual weekday names
hours_per_day.index = hours_per_day.index.map({
    0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday',
    4: 'Friday', 5: 'Saturday', 6: 'Sunday'
})

# Display the result
print(hours_per_day)


day_of_week
Monday       765.724104
Tuesday      774.739106
Wednesday    818.849362
Thursday     793.402961
Friday       883.301667
Saturday     650.671343
Sunday       654.847829
Name: ms_played, dtype: float64


### Total hours played per week

In [9]:
df['week'] = df['ts'].dt.isocalendar().week

hours_per_week = df.groupby(['year', 'week'])['hour'].sum()

print(hours_per_week)


year  week
2013  28       449
      31       829
      33         3
      36       584
      38        10
              ... 
2024  46      3097
      47      2744
      48      6423
      49       408
      50      1077
Name: hour, Length: 483, dtype: int32


### Song Streaming Activity Based on Time

In [10]:
tracks_per_hour = df.groupby('hour').size().reset_index(name='total_songs')
print(tracks_per_hour)

    hour  total_songs
0      0        10884
1      1         9395
2      2         9029
3      3         8550
4      4         6355
5      5         7164
6      6         7369
7      7         4412
8      8         2312
9      9         1695
10    10         1207
11    11          903
12    12          724
13    13         1658
14    14         2759
15    15         3725
16    16         6737
17    17         9194
18    18         8945
19    19         8165
20    20        10494
21    21         8953
22    22         8715
23    23        10516


In [11]:
# Create a new column for the formatted hour (12-hour format with AM/PM)
df['formatted_hour'] = df['hour'].apply(lambda x: pd.to_datetime(f'{x}:00', format='%H:%M').strftime('%I:%M %p'))

# Group by formatted hour and count the number of tracks played in each hour
tracks_per_hour = df.groupby('formatted_hour').size().reset_index(name='total_songs')

# Sort the formatted_hour to ensure AM comes first, then PM
tracks_per_hour['hour_num'] = tracks_per_hour['formatted_hour'].apply(lambda x: pd.to_datetime(x, format='%I:%M %p'))

# Sort the data based on the 12-hour time format
tracks_per_hour = tracks_per_hour.sort_values('hour_num')

# Drop the 'hour_num' column as it's no longer needed
tracks_per_hour = tracks_per_hour.drop(columns=['hour_num'])

# Display the result
print(tracks_per_hour)

   formatted_hour  total_songs
22       12:00 AM        10884
0        01:00 AM         9395
2        02:00 AM         9029
4        03:00 AM         8550
6        04:00 AM         6355
8        05:00 AM         7164
10       06:00 AM         7369
12       07:00 AM         4412
14       08:00 AM         2312
16       09:00 AM         1695
18       10:00 AM         1207
20       11:00 AM          903
23       12:00 PM          724
1        01:00 PM         1658
3        02:00 PM         2759
5        03:00 PM         3725
7        04:00 PM         6737
9        05:00 PM         9194
11       06:00 PM         8945
13       07:00 PM         8165
15       08:00 PM        10494
17       09:00 PM         8953
19       10:00 PM         8715
21       11:00 PM        10516


### Songs KPI

**Total Songs played**

In [12]:
# total_song = df['track_name'].count()
total_song = df['spotify_track_uri'].count()
print(total_song)

149860


**Total Unique Songs Played**

In [None]:
total_unique_song = df['track_name'].nunique()
print(total_unique_song)

13839


**Total Plays Per Song**

In [14]:
# Count how many times each song was played
song_play_counts = df.groupby('track_name').size().reset_index(name='play_count')

# Sort by play_count in descending order
song_play_counts = song_play_counts.sort_values(by='play_count', ascending=False)

# Display the top 10 most played songs
print(song_play_counts.head(10))

                              track_name  play_count
8406                     Ode To The Mets         207
5767                        In the Blood         181
3136                         Dying Breed         166
1937                             Caution         164
82     19 Dias y 500 Noches - En Directo         148
3933                 For What It's Worth         146
550      All These Things That I've Done         142
2286                  Concerning Hobbits         142
2235     Come Together - Remastered 2009         137
11447                          The Boxer         135


### Most Played Songs

In [15]:
most_played_tracks = df.groupby('track_name')['ms_played'].sum().sort_values(ascending=False)

# Select top 10 most played tracks
top_10_tracks = most_played_tracks.head(10)

# Convert ms_played to hours for better readability
top_10_tracks_hours = top_10_tracks / (1000 * 60 * 60)

print(top_10_tracks_hours)

track_name
Ode To The Mets                                                                        18.730994
The Return of the King (feat. Sir James Galway, Viggo Mortensen and Renee Fleming)     17.889350
The Fellowship Reunited (feat. Sir James Galway, Viggo Mortensen and Renée Fleming)    12.432425
19 Dias y 500 Noches - En Directo                                                      11.920567
In the Blood                                                                           10.674191
Claudia's Theme - Version Eight                                                        10.311361
Dying Breed                                                                            10.050737
The Breaking of the Fellowship (feat. "In Dreams")                                      9.997472
All These Things That I've Done                                                         9.931921
Caution                                                                                 9.894429
Name: ms_played, dt

### Artist KPI

**Total Artists**

In [16]:
total_artists = df['artist_name'].nunique()
print(total_artists)

4113


**Total Plays Per Artist**

In [None]:
artist_play_counts = df.groupby('artist_name').size().reset_index(name='play_count')

artist_play_counts = artist_play_counts.sort_values(by='play_count', ascending=False)

print(artist_play_counts.head(10))

             artist_name  play_count
3500         The Beatles       13621
3602         The Killers        6878
1773          John Mayer        4855
465            Bob Dylan        3814
2858      Paul McCartney        2697
2093        Led Zeppelin        2482
1788         Johnny Cash        2478
3684  The Rolling Stones        2390
3003           Radiohead        2305
3503      The Black Keys        2231


### Most Played Artists

In [None]:
most_played_artist = df.groupby('artist_name')['ms_played'].sum().sort_values(ascending=False)

# Select top 10 most played tracks
top_10_artist = most_played_artist.head(10)

# Convert ms_played to hours for better readability
top_10_artist_hours = top_10_artist / (1000 * 60 * 60)

print(top_10_artist_hours)

artist_name
The Beatles           336.162376
The Killers           294.321254
John Mayer            201.449845
Bob Dylan             158.182332
Paul McCartney         99.265103
Howard Shore           96.925188
The Strokes            88.196783
The Rolling Stones     85.532503
Pink Floyd             72.369956
Led Zeppelin           68.982855
Name: ms_played, dtype: float64


## Platform Analysis

### Platform Usage

In [19]:
platform_used = df['platform'].value_counts()
print(platform_used)

platform
android           139821
cast to device      3898
iOS                 3049
windows             1691
mac                 1176
web player           225
Name: count, dtype: int64


### Play Duration by Platform

In [20]:
stream_duration_per_platform = df.groupby('platform')['ms_played'].sum() / (1000 * 60 * 60)
print(stream_duration_per_platform)

platform
android           4857.998661
cast to device     201.937833
iOS                139.708636
mac                 69.974708
web player           6.982257
windows             64.934275
Name: ms_played, dtype: float64


## Song Skipping Behavior

### Skipping Rate

In [21]:
skipped_rate = df['skipped'].value_counts(normalize=True)
print(skipped_rate)

skipped
False    0.947491
True     0.052509
Name: proportion, dtype: float64


### Skipping by Artist/Track

In [None]:
song_skip_by_artist = df[df['skipped'] == True].groupby('artist_name').size()
print(song_skip_by_artist)

artist_name
& Friends             1
24 Carat Black        1
2Pac                  3
3 Doors Down          1
4 Non Blondes         1
                     ..
gnash                 1
iLoveMemphis          2
jame minogue          2
Örjan Hultén Orion    1
ゼロ戦                   1
Length: 1207, dtype: int64


In [None]:
song_skip_by_track = df[df['skipped'] == True].groupby('track_name').size()
print(song_skip_by_track)

track_name
"Hit the Quan" #HTQ                                                                     2
#SELFIE                                                                                 1
'O sole mio (Arr. Chiaramello)                                                          5
'Round Midnight (feat. John Coltrane, Red Garland, Paul Chambers & Philly Joe Jones)    2
(I Can't Get No) Satisfaction - Live At University Of Leeds / 1971                      1
                                                                                       ..
Échame a Mi La Culpa                                                                    2
Óleo de Mujer Con Sombrero                                                              5
❤️                                                                                      3
こんな優しい雨の日は                                                                              2
クール・ヘッド                                                                                 1

## Shuffle Mode Analysis

### Shuffle Usage

In [24]:
shuffled = df['shuffle'].value_counts(normalize=True)
print(shuffled)

shuffle
True     0.744582
False    0.255418
Name: proportion, dtype: float64


### Shuffle and Skips
Check if shuffle mode is related to skipping tracks.

In [25]:
shuffle_skip_relation =  df.groupby('shuffle')['skipped'].mean()
print(shuffle_skip_relation)

shuffle
False    0.043237
True     0.055689
Name: skipped, dtype: float64


## Track End Reason Analysis

### Reasons for Track End

In [26]:
end_reasons = df['reason_end'].value_counts()
print(end_reasons)

reason_end
trackdone                       77194
fwdbtn                          53470
endplay                         10119
logout                           4367
backbtn                          2182
unexpected-exit-while-paused     1725
unknown                           268
remote                            224
unexpected-exit                   118
clickrow                           34
nextbtn                            21
appload                             7
popup                               6
reload                              4
trackerror                          4
Name: count, dtype: int64


## Playlist or Album Analysis

### Album KPI

**Total Albums**

In [None]:
total_albums = df['album_name'].nunique()
print(total_albums)

7948


**Total Plays Per Album:**

In [28]:
album_play_counts = df.groupby('album_name').size().reset_index(name='play_count')

album_play_counts = album_play_counts.sort_values(by='play_count', ascending=False)

print(album_play_counts.head(10))

                                 album_name  play_count
6447                            The Beatles        2063
4981                           Past Masters        1672
293                              Abbey Road        1429
6926                               The Wall        1241
5421                               Revolver        1038
2948                                  Help!         979
640                        At Folsom Prison         918
5745  Sgt. Pepper's Lonely Hearts Club Band         910
2299                   Exile On Main Street         892
3086                               Hot Fuss         878


### Popular Albums

In [29]:
most_played_album = df.groupby('album_name')['ms_played'].sum().sort_values(ascending=False) / (1000 * 60 * 60)
most_played_album = most_played_album.head(10)
print(most_played_album)

album_name
The New Abnormal                                      51.877193
The Beatles                                           51.847027
Imploding The Mirage                                  44.423156
Abbey Road                                            41.283419
Blood On The Tracks                                   41.080285
Past Masters                                          40.284466
Hot Fuss                                              40.124312
The Wall                                              39.250503
Where the Light Is: John Mayer Live In Los Angeles    31.615000
Pressure Machine                                      31.479001
Name: ms_played, dtype: float64


### Top Tracks per Album

In [30]:
top_track_per_album = df.groupby(['album_name', 'track_name'])['ms_played'].sum().sort_values(ascending=False) / (1000 * 60 * 60)
print(top_track_per_album.head(10))

album_name                                                                              track_name                                                                         
The New Abnormal                                                                        Ode To The Mets                                                                        18.730994
The Lord of the Rings: The Return of the King                                           The Return of the King (feat. Sir James Galway, Viggo Mortensen and Renee Fleming)     17.889350
The Lord of the Rings - The Return of the King - The Complete Recordings                The Fellowship Reunited (feat. Sir James Galway, Viggo Mortensen and Renée Fleming)    12.432425
Nos Sobran Los Motivos                                                                  19 Dias y 500 Noches - En Directo                                                      11.920567
The Search for Everything                                                               

## Correlation Analysis

### Correlation between Variables
Explore if there is any relationship between variables such as `ms_played`, `shuffle`, and `skipped`.

In [31]:
df['shuffle'] = df['shuffle'].map({True: 1, False: 0})  # Convert boolean to numeric for correlation
df['skipped'] = df['skipped'].map({True: 1, False: 0})
correlation_matrix = df[['ms_played', 'shuffle', 'skipped']].corr()
print(correlation_matrix)

           ms_played   shuffle   skipped
ms_played   1.000000 -0.182784 -0.193644
shuffle    -0.182784  1.000000  0.024345
skipped    -0.193644  0.024345  1.000000


In [32]:
# Convert categorical variables (e.g., platform) to numeric if necessary (using one-hot encoding or label encoding)
df['platform_encoded'] = df['platform'].astype('category').cat.codes  # Example: encoding platform

# If you want to include 'day_of_week' or 'hour', you can use these directly
correlation_matrix = df[['ms_played', 'shuffle', 'skipped', 'platform_encoded', 'day_of_week', 'hour']].corr()

print(correlation_matrix)


                  ms_played   shuffle   skipped  platform_encoded  \
ms_played          1.000000 -0.182784 -0.193644          0.068586   
shuffle           -0.182784  1.000000  0.024345         -0.132003   
skipped           -0.193644  0.024345  1.000000          0.036606   
platform_encoded   0.068586 -0.132003  0.036606          1.000000   
day_of_week       -0.021893  0.090852  0.013710         -0.060523   
hour               0.048194 -0.099407 -0.043566          0.015535   

                  day_of_week      hour  
ms_played           -0.021893  0.048194  
shuffle              0.090852 -0.099407  
skipped              0.013710 -0.043566  
platform_encoded    -0.060523  0.015535  
day_of_week          1.000000 -0.055261  
hour                -0.055261  1.000000  


## Visualizing the Data Using Plotly and Dash

After running the below code, open this link **http://127.0.0.1:8050/** on a web browser to view the dashboard.

In [33]:
# Initialize Dash app
app = dash.Dash(__name__)

# Layout of the Dash App
app.layout = html.Div([
    # KPI Cards Row
    html.Div(
        children=[
            # Total Stream Hours KPI Card
            html.Div(
                children=[
                    html.Img(
                        src="https://storage.googleapis.com/pr-newsroom-wp/1/2023/05/Spotify_Primary_Logo_RGB_Green.png",  # URL of the image (you can replace it with your image URL)
                        style={'width': '50px', 'height': '50px', 'marginRight': '15px'}  # Style the image (adjust size)
                    ),
                    html.H1("Spotify Wrapped")
                ],
                style={'backgroundColor': '#191414', 'color': 'white', 'padding': '5px', 'borderRadius': '10px', 'width': '31%', 'textAlign': 'center', 'display': 'flex', 'alignItems': 'center', 'justifyContent': 'center', 'boxShadow': '0px 4px 6px rgba(0, 0, 0, 0.1)', 'margin': '10px'}
            ),
            
            # Total Stream Hours KPI Card
            html.Div(
                children=[
                    html.H3(f"{total_hours_played:.2f} hours"),
                    html.P("Streaming Time")
                ],
                style={'backgroundColor': '#191414', 'color': 'white', 'padding': '5px', 'borderRadius': '10px', 'width': '15%', 'textAlign': 'center', 'boxShadow': '0px 4px 6px rgba(0, 0, 0, 0.1)', 'margin': '10px'}
            ),
            
            # Total Songs KPI Card
            html.Div(
                children=[
                    html.H3(f"{total_song} songs"),
                    html.P("Songs Played")
                ],
                style={'backgroundColor': '#191414', 'color': 'white', 'padding': '5px', 'borderRadius': '10px', 'width': '15%', 'textAlign': 'center', 'boxShadow': '0px 4px 6px rgba(0, 0, 0, 0.1)', 'margin': '10px'}
            ),
            
            # Total Artists KPI Card
            html.Div(
                children=[
                    html.H3(f"{total_artists} artists"),
                    html.P("Artists")
                ],
                style={'backgroundColor': '#191414', 'color': 'white', 'padding': '5px', 'borderRadius': '10px', 'width': '15%', 'textAlign': 'center', 'boxShadow': '0px 4px 6px rgba(0, 0, 0, 0.1)', 'margin': '10px'}
            ),
            
            # Total Albums KPI Card
            html.Div(
                children=[
                    html.H3(f"{total_albums} albums"),
                    html.P("Albums")
                ],
                style={'backgroundColor': '#191414', 'color': 'white', 'padding': '5px', 'borderRadius': '10px', 'width': '15%', 'textAlign': 'center', 'boxShadow': '0px 4px 6px rgba(0, 0, 0, 0.1)', 'margin': '10px'}
            ),
        ],
        style={'display': 'flex', 'justify-content': 'space-between', 'flex-wrap': 'wrap', 'marginBottom': '30px'}  # Flex layout for KPI cards
    ),

    # First Row (3 Visuals)
    html.Div(
        children=[
            # Line Chart: Total Plays by Hour
            dcc.Graph(
                id='total-plays-hour',
                figure={
                    'data': [
                        go.Scatter(
                            x=tracks_per_hour['formatted_hour'],
                            y=tracks_per_hour['total_songs'],
                            mode='lines+markers',
                            name='Total Songs',
                            text=tracks_per_hour['total_songs'],
                            textposition='top center',
                            line=dict(color='#1DB954'),  # Spotify Green
                            marker=dict(size=8, color='#1ED760')  # Spotify Light Green
                        )
                    ],
                    'layout': go.Layout(
                        title='Streaming Activity by Hours',
                        xaxis={'title': 'Hour of Day'},
                        yaxis={'title': 'Total Songs'},
                        showlegend=False,
                        plot_bgcolor='#191414',
                        paper_bgcolor='#191414',
                        font=dict(color='white')
                    )
                },
                style={'width': '32%', 'display': 'inline-block', 'marginBottom': '30px', 'borderRadius': '10px'}  # Width adjusted for three visuals per row and border-radius
            ),

            # Bar Chart: Total Plays by Artist
            dcc.Graph(
                id='total-plays-artist',
                figure={
                    'data': [
                        go.Bar(
                            x=top_10_artist_hours.index,
                            y=top_10_artist_hours,
                            name='Total Plays by Artist',
                            hovertext=[f'{artist}<br>{hours:.2f} hours' for artist, hours in zip(top_10_artist_hours.index, top_10_artist_hours)],
                            hoverinfo='text',
                            textposition='auto',
                            marker=dict(color='#1DB954')  # Spotify Green
                        )
                    ],
                    'layout': go.Layout(
                        title='Top Played Artists',
                        xaxis={'title': 'Artist Name', 'tickangle': 45},
                        yaxis={'title': 'Hours'},
                        showlegend=False,
                        plot_bgcolor='#191414',
                        paper_bgcolor='#191414',
                        font=dict(color='white')
                    )
                },
                style={'width': '32%', 'display': 'inline-block', 'marginBottom': '30px', 'borderRadius': '10px'}
            ),

            # New Visual: Platform Usage (Pie Chart)
            dcc.Graph(
                id='platform-usage',
                figure={
                    'data': [
                        go.Pie(
                            labels=['Mobile', 'Web', 'Desktop'],
                            values=platform_used.values,  # Assume platform_usage is a DataFrame or series with percentage values
                            hole=0.3,
                            textinfo='percent+label',
                            marker=dict(colors=['#1DB954', '#1ED760', '#B3B3B3'])  # Spotify Green, Light Green, and Grey
                        )
                    ],
                    'layout': go.Layout(
                        title='Platform Usage',
                        showlegend=True,
                        plot_bgcolor='#191414',
                        paper_bgcolor='#191414',
                        font=dict(color='white')
                    )
                },
                style={'width': '32%', 'display': 'inline-block', 'marginBottom': '30px', 'borderRadius': '10px'}  # Adjust size for three visuals in the row
            ),
        ],
        style={'display': 'flex', 'justify-content': 'space-between', 'flex-wrap': 'wrap', 'marginBottom': '30px'}  # Flex layout
    ),

    # Second Row (3 Visuals)
    html.Div(
        children=[
            # Bar Chart: Total Plays by Track
            dcc.Graph(
                id='total-plays-track',
                figure={
                    'data': [
                        go.Bar(
                            x=top_10_tracks_hours.index,
                            y=top_10_tracks_hours,
                            name='Total Plays by Track',
                            hovertext=[f'{track}<br>{hours:.2f} hours' for track, hours in zip(top_10_tracks_hours.index, top_10_tracks_hours)],
                            hoverinfo='text',
                            textposition='auto',
                            marker=dict(color='#1ED760')  # Spotify Light Green
                        )
                    ],
                    'layout': go.Layout(
                        title='Top Played Tracks',
                        xaxis={'title': 'Track Name', 'tickangle': 45},
                        yaxis={'title': 'Hour'},
                        showlegend=False,
                        plot_bgcolor='#191414',
                        paper_bgcolor='#191414',
                        font=dict(color='white')
                    )
                },
                style={'width': '32%', 'display': 'inline-block', 'marginBottom': '30px', 'borderRadius': '10px'}
            ),

            # Line Chart: Total Plays by Day of Week
            dcc.Graph(
                id='total-plays-day',
                figure={
                    'data': [
                        go.Scatter(
                            x=hours_per_day.index,
                            y=hours_per_day,
                            mode='lines+markers',
                            name='Total Plays by Day of Week',
                            text=hours_per_day,
                            textposition='top center',
                            line=dict(color='#1DB954'),
                            marker=dict(size=8, color='#1ED760')
                        )
                    ],
                    'layout': go.Layout(
                        title='Streaming Activity by Day',
                        xaxis={'title': 'Day of the Week'},
                        yaxis={'title': 'Hours'},
                        showlegend=False,
                        plot_bgcolor='#191414',
                        paper_bgcolor='#191414',
                        font=dict(color='white')
                    )
                },
                style={'width': '32%', 'display': 'inline-block', 'marginBottom': '30px', 'borderRadius': '10px'}
            ),

            # Pie Chart: Skipped Rate
            dcc.Graph(
                id='skipped-rate',
                figure={
                    'data': [
                        go.Pie(
                            labels=['Skipped', 'Not Skipped'],
                            values=skipped_rate.values * 100,
                            hole=0.3,
                            textinfo='percent+label',
                            marker=dict(colors=['#FF6347', '#90EE90'])  # Red for skipped, green for not skipped
                        )
                    ],
                    'layout': go.Layout(
                        title='Skipped Rate',
                        showlegend=True,
                        plot_bgcolor='#191414',
                        paper_bgcolor='#191414',
                        font=dict(color='white')
                    )
                },
                style={'width': '32%', 'display': 'inline-block', 'marginBottom': '30px', 'borderRadius': '10px'}
            ),
        ],
        style={'display': 'flex', 'justify-content': 'space-between', 'flex-wrap': 'wrap', 'marginBottom': '30px'}
    ),

    # Third Row (2 Visuals)
    html.Div(
        children=[
            # Bar Chart: End Reasons
            dcc.Graph(
                id='end-reasons',
                figure={
                    'data': [
                        go.Bar(
                            x=end_reasons.index,
                            y=end_reasons.values,
                            name='End Reasons',
                            text=end_reasons.values,
                            textposition='auto',
                            marker=dict(color='#1ED760')  # Spotify Light Green
                        )
                    ],
                    'layout': go.Layout(
                        title='End Reasons for Streams',
                        xaxis={'title': 'Reason'},
                        yaxis={'title': 'Count'},
                        showlegend=False,
                        plot_bgcolor='#191414',
                        paper_bgcolor='#191414',
                        font=dict(color='white')
                    )
                },
                style={'width': '48%', 'display': 'inline-block', 'marginBottom': '30px', 'borderRadius': '10px'}
            ),

            # Bar Chart: Top Albums
            dcc.Graph(
                id='top-albums',
                figure={
                    'data': [
                        go.Bar(
                            x=most_played_album.index,
                            y=most_played_album,
                            name='Top Albums',
                            hovertext=[f'{album}<br>{plays:.2f} hours' for album, plays in zip(most_played_album.index, most_played_album)],
                            hoverinfo='text',
                            textposition='auto',
                            marker=dict(color='#1DB954')  # Spotify Green
                        )
                    ],
                    'layout': go.Layout(
                        title='Top Played Albums',
                        xaxis={'title': 'Album Name', 'tickangle': 45},
                        yaxis={'title': 'Hours Played'},
                        showlegend=False,
                        plot_bgcolor='#191414',
                        paper_bgcolor='#191414',
                        font=dict(color='white')
                    )
                },
                style={'width': '48%', 'display': 'inline-block', 'marginBottom': '30px', 'borderRadius': '10px'}
            ),
        ],
        style={'display': 'flex', 'justify-content': 'space-between', 'flex-wrap': 'wrap', 'marginBottom': '30px'}
    ),
])

# Run the Dash app
if __name__ == '__main__':
    app.run_server(debug=True)