In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

### Reading CSV file

In [2]:
df_songs = pd.read_csv('./dataset/csv_data/songs_data.csv')

In [3]:
df_songs.head(5)

Unnamed: 0,song_id,artist_name,track_name,uri_id,release_date,explicit,type,popularity,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,7GajHE5z66qqmzTrKVFCab,Ella Henderson,Take Care of You,spotify:track:7GajHE5z66qqmzTrKVFCab,2020-06-12,True,track,67,0.66,0.667,-6.87,0.0474,0.141,5.3e-05,0.107,0.452,114.901
1,3hWfKBt3n7j1xqIy6LA5ve,Vicetone,Astronomia,spotify:track:3hWfKBt3n7j1xqIy6LA5ve,2016-09-16,False,track,72,0.653,0.945,-2.634,0.096,0.00561,0.915,0.439,0.326,126.093
2,0gplL1WMoJ6iYaPgMCL0gX,Adele,Easy On Me,spotify:track:0gplL1WMoJ6iYaPgMCL0gX,2021-10-14,False,track,100,0.604,0.366,-7.519,0.0282,0.578,0.0,0.133,0.13,141.981
3,2etALbTip5aL30PxVbELDU,Sandman,Summer Lover,spotify:track:2etALbTip5aL30PxVbELDU,2021-08-20,False,track,27,0.564,0.687,-6.461,0.0264,0.281,0.0,0.156,0.768,86.95
4,0D9FOgEG9uZm8EsE5q2Y6F,Sarah Proctor,Lost,spotify:track:0D9FOgEG9uZm8EsE5q2Y6F,2021-05-14,False,track,45,0.543,0.458,-6.641,0.0306,0.306,0.0,0.105,0.241,126.804


Converting release_date to datetime

In [4]:
df_songs['release_date'] =  pd.to_datetime(df_songs['release_date'])

In [5]:
df_songs['year'] = df_songs['release_date'].dt.year
df_songs['month'] = df_songs['release_date'].dt.month
df_songs['day'] = df_songs['release_date'].dt.day

In [6]:
df_songs.columns

Index(['song_id', 'artist_name', 'track_name', 'uri_id', 'release_date',
       'explicit', 'type', 'popularity', 'danceability', 'energy', 'loudness',
       'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'year', 'month', 'day'],
      dtype='object')

In [7]:
# most artists listened
count_greater_1 = df_songs['artist_name'].value_counts()
artist_name = count_greater_1[count_greater_1 > 5].index
artist_count = count_greater_1[count_greater_1 > 5].values

fig = go.Figure(
    data=[
        go.Bar(
            x = artist_name, 
            y = artist_count,
            marker = {
                'color' : artist_count
            },
            text = artist_name,
        )
    ]
)
fig.update_layout(
    title_text= 'Count of Artists listened',
    barmode='group', 
    xaxis_tickangle=45,
    xaxis_title = "Name",
    yaxis_title = "Count",
    hovermode = 'x',
    height = 650,
    width = 1210,
)
fig.show()


In [8]:
# plot for count of track listened
month = df_songs['month'].value_counts().sort_index(ascending=True)
# month

months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'July', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec']

fig = go.Figure(
    data=[
        go.Bar(
            x = months, 
            y = month.values,
            marker = {
                'color' : month.values
            },
            text = months,
            textposition = 'inside'
        )
    ]
)
fig.update_layout(
    title_text='Count of songs listened as per month',
    barmode='group', 
    xaxis_title = "month",
    yaxis_title = "count",
    height = 800,
    width = 1210,
    hovermode = 'x'
)
fig.show()

In [9]:
# plot for count of track listened
day = df_songs['day'].value_counts().sort_index(ascending=True)
# day

days = ['1st', '2nd', '3rd', '4th', '5th', '6th', 'July', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec']

fig = go.Figure(
    data=[
        go.Bar(
            x = day.index, 
            y = day.values,
            marker = {
                'color' : day.values
            },
            text = day,
            textposition = 'inside'
        )
    ]
)
fig.update_layout(
    title_text='Count of songs released as per days',
    barmode='group', 
    xaxis_title = "month",
    yaxis_title = "count",
    height = 800,
    width = 1210,
    hovermode = 'x'
)
fig.show()

In [10]:
explicit_content = df_songs['explicit'].value_counts().sort_index(ascending=True)
colors = ['lightgreen', 'red']

labels = explicit_content.index
values = explicit_content.values

fig = go.Figure(
    data=[
        go.Pie(
            labels=labels, 
            values=values,
            hole=.3
        )
    ]
)
fig.update_layout(
    title_text='Explicit songs distribution',
    height = 600,
    width = 900,
    hovermode = 'x'
)
fig.update_traces(
    hoverinfo='label+percent', 
    textinfo='value', 
    textfont_size=20,
    marker=dict(
        colors = colors, 
        line=dict(
            color='#000000', width=2
            )
        )
    )
fig.show()

In [11]:
correlation = df_songs[['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']]
correlation = correlation.corr(method='spearman')
fig = px.imshow(
    correlation,
    labels = dict(
        color="Correlation"
        )
    )
fig.update_layout(
    title_text='Heatmap Correlation between songs attributes',
    height = 600,
    width = 800,
)
fig.show()

In [12]:
popularity = df_songs[['artist_name', 'popularity']]

most_popular = popularity.groupby('artist_name').agg('max').reset_index().sort_values(by='popularity', ascending=False)[:1]
least_popular = popularity.groupby('artist_name').agg('max').reset_index().sort_values(by='popularity', ascending=False)[::-1][:1]

In [13]:
print(f'Most popular: {" ".join(most_popular["artist_name"])}')
print(f'least popular: {" ".join(least_popular["artist_name"])}')

Most popular: Adele
least popular: Mabel


In [14]:
# radar plot
song_attributes = df_songs[['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']]
song_attributes

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0.660,0.667,-6.870,0.0474,0.14100,0.000053,0.1070,0.452,114.901
1,0.653,0.945,-2.634,0.0960,0.00561,0.915000,0.4390,0.326,126.093
2,0.604,0.366,-7.519,0.0282,0.57800,0.000000,0.1330,0.130,141.981
3,0.564,0.687,-6.461,0.0264,0.28100,0.000000,0.1560,0.768,86.950
4,0.543,0.458,-6.641,0.0306,0.30600,0.000000,0.1050,0.241,126.804
...,...,...,...,...,...,...,...,...,...
1200,0.652,0.802,-6.114,0.1810,0.08710,0.000006,0.1480,0.295,170.157
1201,0.586,0.561,-6.594,0.0817,0.03240,0.000000,0.1070,0.446,161.906
1202,0.759,0.605,-7.737,0.0433,0.15600,0.000000,0.6940,0.640,121.049
1203,0.594,0.808,-2.409,0.0489,0.41500,0.000014,0.5650,0.919,188.090


In [15]:
fig = go.Figure(
    data = go.Scatterpolar(
        r = song_attributes.mean(),
        theta = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo'],
        fill = 'tonext'
    )
)
fig.update_layout(
    polar = dict(
        radialaxis = dict(
            visible = True
        ),
    ),
    showlegend=False,
    title_text = 'Radar chart for songs features'
)

fig.show()

In [16]:
def get_correlation_song_attribute(x, y):
    fig = px.scatter(
        df_songs,
        x = x, 
        y = y, 
        color = "year", 
        facet_row = "explicit",
        marginal_y = "box"
    )
    fig.update_layout(
        title = f'Correlation between {x} and {y} attributes',
    )
    return fig

In [17]:
get_correlation_song_attribute('speechiness', 'danceability')