# 🎶 Objectif 3 : Comparaison entre le nombre de playlists Spotify et le nombre de streams par année de sortie

#### Rappel

Scatter plot pour comparer les chansons ajoutées dans les playlists avec celles qui n'y figurent pas, en analysant si l'intégration dans les playlists garantit une augmentation significative des streams.

In [27]:
# Import
import pandas as pd
from dash import Dash, dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import statsmodels.api as sm

In [28]:
# Chargement
data = pd.read_csv("./dataset/dataset_filtered.csv")

In [29]:
# Filtrage des données
data_filtered = data[['track', 'spotify_playlists', 'streams', 'artist_name', 'released_year']].dropna()
data_filtered = data_filtered[data_filtered['streams'].apply(lambda x: str(x).isdigit())]
data_filtered = data_filtered[data_filtered['spotify_playlists'].apply(lambda x: str(x).isdigit())]
data_filtered = data_filtered[data_filtered['released_year'].apply(lambda x: str(x).isdigit())]

In [30]:
# Convertir les colonnes en entiers
data_filtered['spotify_playlists'] = data_filtered['spotify_playlists'].astype(int)
data_filtered['streams'] = data_filtered['streams'].astype(int)
data_filtered['released_year'] = data_filtered['released_year'].astype(int)

OverflowError: Python int too large to convert to C long

In [26]:
# Ligne de régression
X = sm.add_constant(data_filtered['spotify_playlists'])
model = sm.OLS(data_filtered['streams'], X).fit()
data_filtered['regression_line'] = model.predict(X)

ValueError: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).

In [None]:
# Initialisation de l'application Dash
app = Dash(__name__)

app.layout = html.Div([
    html.H1("Playlists Spotify VS Streams par Année de Sortie"),
    dcc.Graph(id='scatter-plot')
])

In [None]:
# Callback pour mettre à jour le graphique en fonction de la période sélectionnée
@app.callback(
    Output('scatter-plot', 'figure'),
    Input('scatter-plot', 'id')
)
def update_graph(_):
    fig = px.scatter(
        data_filtered,
        x='in_spotify_playlists',
        y='streams',
        color='released_year',
        color_continuous_scale=["yellow", "orange", "purple"],
        hover_data={
            'track_name': True,
            'artist.s._name': True,
            'in_spotify_playlists': True,
            'streams': True,
            'released_year': True
        },
        labels={
            'released_year': 'Released Year',
            'in_spotify_playlists': 'Number of Playlists',
            'streams': 'Number of Streams'
        },
        title="Relation between Spotify Playlists and Streams by Release Year"
    )

    # Ajout de la droite de régression
    fig.add_scatter(
        x=data_filtered['in_spotify_playlists'],
        y=data_filtered['regression_line'],
        mode='lines',
        name='Regression Line',
        line=dict(color='red', dash='dash')
    )

    fig.update_layout(
        xaxis_title="Number of Playlists",
        yaxis_title="Number of Streams",
        hovermode="closest"
    )

    return fig

In [None]:
# Exécution du serveur (http://127.0.0.1:8050/)
if __name__ == '__main__':
    app.run_server(debug=True)