# Spotify analisis using Spotify's data from Kaggle

In [1]:
# Imports needed for the file
import spotipy
import numpy as np
import spotifykey
import sys 
import pandas as pd
import json
import plotly.express as px
from dash import Dash, html, dcc, Input, Output
from spotipy.oauth2 import SpotifyOAuth
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
# Initialisation of the needed spotify tools

#You need here to put your key and secret in the spotifykey.py file (the variable are for now empty)
key = spotifykey.api_key
secret = spotifykey.api_secret

scope = "user-library-read"
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id = key, client_secret = secret)) #Connexion to spotify account

In [3]:
# Initialisation of the data from the kaggle csv
spotify_data = pd.read_csv("Spotify-2000.csv")
spotify_data["datetime"] = pd.to_datetime(spotify_data["Year"], errors="coerce", format = "%Y")
spotify_data.set_index("datetime", inplace = True)
spotify_data["nb_track"] = 1
spotify_data.drop(columns=["Index", "Title", "Artist", "Year", "Beats Per Minute (BPM)", "Energy", "Danceability", "Loudness (dB)", "Liveness", "Valence", "Length (Duration)", "Acousticness", "Speechiness", "Popularity"], inplace=True)

In [4]:
# Creation of an other data frame for the genre
genre = pd.DataFrame(data=spotify_data['Top Genre'].value_counts())
genre['name']=genre.index
genre['value']=genre['Top Genre']
genre.drop(columns=['Top Genre'], inplace = True)

In [5]:
#Creation of a dashboard object to display the music statistics

app_music = Dash(__name__)
app_music.layout = html.Div([
    html.H1(children="Spotify music statistics from 1958 to 2019"),
    
    dcc.DatePickerRange(id="date-range",
                        start_date = "1958-01-01",
                        end_date = "2020-01-01"),
    
    dcc.RadioItems(id="data_res",
                   options={"year":"By year",
                            "genre":"By genre"},
                           value="year",
                           inline=True),
    
    dcc.Graph(id="graph"),
])

@app_music.callback(
    Output(component_id="graph", component_property="figure"),
    Input(component_id="date-range", component_property="start_date"),
    Input(component_id="date-range", component_property="end_date"),
    Input(component_id="data_res", component_property="value")
)

def update_figure(start_date, end_date, data_res_value):


    if data_res_value == "year":
        spotify_data_resampled = spotify_data.resample('Y').count()
        spotify_data_updated = spotify_data_resampled.loc[(spotify_data_resampled.index.strftime("%Y-%m-%d") >= start_date) & 
                                   (spotify_data_resampled.index.strftime("%Y-%m-%d") <= end_date)] 
        x_title = "Year"
        fig = px.line(spotify_data_updated,
                     y=spotify_data_updated['nb_track'],
                     x=spotify_data_updated.index)
        
    elif data_res_value == "genre":
        x_title = "Genre"
        fig = px.sunburst(genre, path=['name'], values='value',
                  color='name')
    
    
    fig.update_layout(xaxis_title=x_title,
                      yaxis_title="Count",
                      title="Statistics of the 2000 best spotify musics",
                      height=600)

    return fig

In [None]:
if __name__ == '__main__':
    app_music.run(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__'
 * Debug mode: on
