# Análise de conjunto de dados vindo do Spotify

## Projeto da disciplina SCC0252 - Visualização Computacional

### Membros

* Alexandre E. de Souza Jesus - alexandre_souza@usp.br - **12559506**

* Eduardo Zaffari Monteiro - eduardozaffarimonteiro@usp.br - **12559490**

* Pedro Henrique de Freitas Maçonetto - pedromaconetto@usp.br - **12675419**

In [93]:
import dash
from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import plotly.graph_objs as go
import pandas as pd

import plotly.express as px
import numpy as np

from wordcloud import WordCloud
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [130]:
# Criando aplicativo
aplicativo = dash.Dash(__name__)

# Lendo os dados
df = pd.read_csv('https://raw.githubusercontent.com/EduardoDuX/DataViz/main/spotify-2023.csv', encoding='latin-1')

# Tratamento dos dados
stream = df['streams'].apply(lambda x: x.isnumeric())
df = df[stream]
df['streams'] = df['streams'].astype('int64')

# Figura do grafico 1
fig_1 = px.bar(df,
             x='released_year',
             y='in_spotify_playlists',
             color='key',
             labels={
                     "in_spotify_playlists": "Playlists do Spotify",
                     "released_year": "Ano de lançamento",
                     "key": "Tonalidade"
                 })

# Figura do grafico 2
fig_2 = px.scatter(df, 
            size='artist_count',
            y='streams',
            x='in_spotify_playlists',
            color='artist_count',
            labels={
                     "streams": "Quantidade de reproduções",
                     "in_spotify_playlists": "Playlists em que aparece",
                     "artist_count": "Artistas envolvidos"
                 })

aplicativo.layout = html.Div([

    # Titulo do projeto
    html.H1('Análise de dados do Spotify', style={'text-align':'center'}),
    

    # Gráfico 1
    html.Div([
        html.H1('Aparecimento em playlists do Spotify por ano de lançamento por tonalidade'),
        dcc.Graph(
            id='grafico_1',
            figure=fig_1
        ),
        html.H3('Anos no gráfico:'),
        dcc.RangeSlider(
            id='year-slider',
            min=df.released_year.min(),
            max=df.released_year.max(),
            step=3,
            value=[1980, 2023],
            marks={i: {'label': str(i), 'style': {'transform': 'rotate(-45deg)'}} 
                   for i in range(df.released_year.min(), df.released_year.max() + 1, 3)},
        )],
    style={'display': 'inline-block', 'width':'48%'}),


    # Gráfico 2
    html.Div([
        html.H1('Popularidade por aparecimento em playlists dado o número de artistas'),
        dcc.Graph(
            id='grafico_2',
            figure=fig_2
        )
    ],
    style={'display': 'inline-block','float': 'right', 'width':'48%'})


])

@aplicativo.callback(
    Output('grafico_1', 'figure'),
    Input('year-slider', 'value')
)
def update_grafico_1(year_list):
    df_updated = df[df['released_year'].isin(range(*year_list))]

    updated_fig = px.bar(df_updated,
             x='released_year',
             y='in_spotify_playlists',
             color='key',
             labels={
                     "in_spotify_playlists": "Playlists do Spotify",
                     "released_year": "Ano de lançamento",
                     "key": "Tonalidade"
                 })
    
    return updated_fig


if __name__ == '__main__':
    aplicativo.run_server(debug=True)

In [52]:
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import pandas as pd

app = Dash(__name__)

# Load the data into a DataFrame
df = pd.read_csv('https://gist.githubusercontent.com/chriddyp/5d1ea79569ed194d432e56108a04d188/raw/a9f9e8076b837d541398e999dcbac2b2826a81f8/gdp-life-exp-2007.csv')

# Create the first visualization (scatter plot)
fig = px.scatter(df, x="gdp per capita", y="life expectancy",
                 size="population", color="continent", hover_name="country",
                 size_max=60)

# Create the second visualization (bar chart)
fig2 = px.bar(df, x="country", y="gdp per capita")

app.layout = html.Div([
    html.Div([
        html.H3("Scatter Plot: Life Expectancy vs. GDP Per Capita"),
        dcc.RadioItems(
            id='log-scale',
            options=[
                {'label': 'Log Scale', 'value': 'log'},
                {'label': 'Linear Scale', 'value': 'linear'}
            ],
            value='log',
            labelStyle={'display': 'block'}
        ),
        dcc.Graph(
            id='life-exp-vs-gdp',
            figure=fig
        )
    ], style={'width': '49%', 'display': 'inline-block'}),
    
    html.Div([
        html.H3("Bar Chart: GDP Per Capita by Country"),
        dcc.Graph(
            id='gdp-per-capita-bar',
            figure=fig2
        ),
        dcc.Slider(
            id='country-count-slider',
            min=10,
            max=len(df),
            step=10,
            value=len(df),
            marks={i: {'label': str(i), 'style': {'transform': 'rotate(-45deg)'}} for i in range(10, len(df) + 1, 10)},
        ),
        dcc.Dropdown(
            id='sorting-dropdown',
            options=[
                {'label': 'Sort by Alphabetical Order', 'value': 'alphabetical'},
                {'label': 'Sort by GDP per Capita', 'value': 'gdp'},
            ],
            value='alphabetical',
            style={'width': '50%'}
        )
    ], style={'width': '49%', 'display': 'inline-block'})
])

@app.callback(
    Output('gdp-per-capita-bar', 'figure'),
    Input('country-count-slider', 'value'),
    Input('sorting-dropdown', 'value')
)
def update_country_count(selected_count, sorting_option):
    if sorting_option == 'alphabetical':
        sorted_df = df.sort_values(by='country').head(selected_count)
    else:
        sorted_df = df.sort_values(by='gdp per capita', ascending=False).head(selected_count)
    
    updated_fig = px.bar(sorted_df, x="country", y="gdp per capita")
    return updated_fig

@app.callback(
    Output('life-exp-vs-gdp', 'figure'),
    Input('log-scale', 'value')
)
def update_scale(selected_scale):
    updated_fig = px.scatter(df, x="gdp per capita", y="life expectancy",
                             size="population", color="continent", hover_name="country",
                             size_max=60, log_x=(selected_scale == 'log'))
    return updated_fig

if __name__ == '__main__':
    app.run_server(debug=True)
