In [1]:
import plotly.express as px
import pandas as pd
from jupyter_dash import JupyterDash
from dash import Dash, dcc, html, Input, Output

In [2]:
# Read in the CSV file
df = pd.read_csv('outlier_removal/automated_dbscanned_trackdata.csv')

app = JupyterDash(__name__)

app.layout = html.Div([
    dcc.Dropdown(
        id="dropdown",
        options=["popularity", "danceability", "energy","key","loudness", "speechiness",
                 "acousticness", 'instrumentalness', "liveness","valence","tempo",
                 "duration_ms","time_signature"],
        value="popularity",
        clearable=False,
    ),
    dcc.Graph(id="graph"),
])

@app.callback(
    Output("graph", "figure"),
    Input("dropdown", "value"))

def update_bar_chart(element):
    grouped = df.groupby('genre')
    means = grouped[element].mean()
    means = means.sort_values()

    fig = px.bar(y=means.index, x=abs(means), color=means, color_continuous_scale="Rainbow", 
                title="Average " + element.capitalize() + " per Genre", template='plotly_dark', text=means.round(3))

    fig.update_layout(coloraxis_showscale=False, xaxis_title="Average " + element.capitalize(), yaxis_title="Genre",height=500)
    fig.update_traces(hovertemplate=None, hoverinfo='skip')
        
    return fig
    
app.run_server()

Dash is running on http://127.0.0.1:8050/

Dash app running on http://127.0.0.1:8050/


In [3]:
# df = pd.read_csv("zscore_normalised_trackdata.csv")
# df1 = df.iloc[:,3:].drop('id', axis = 1)

df = pd.read_csv("outlier_removal/automated_zscore_normalised_trackdata.csv")
df1 = df.iloc[:,3:]

print(df1)
features = df1.iloc[:,:-1]

corrvals = features.corr().round(2)
import seaborn as sn
fig=px.imshow(corrvals,text_auto=True, aspect="Auto",title="Correlation Matrix", width=832, height=468, template='plotly_white',
              color_continuous_scale="tealgrn", labels=dict(color="Correlation"))
fig.update_layout(font=dict(size=18, family="Comic Sans"), coloraxis_showscale=False, autosize=False, margin=dict(l=0, r=10, t=50, b=0))
fig.update(layout_showlegend=False)
fig.show()

# fig.write_html("Graphs/CorrelationMatrix_new_zscore.html")

fig.write_html("Graphs/CorrelationMatrix_new_zscore_removed.html")

                                                  album  popularity  \
0                                             Belonging    0.021925   
1                 Hallelujah - 35 Great Sacred Choruses   -0.702255   
2     Monk Alone: The Complete Columbia Solo Studio ...   -2.060092   
3      Genius Of Modern Music (Vol.1, Expanded Edition)    0.293493   
4                                 'Round About Midnight    0.248232   
...                                                 ...         ...   
6091                    Birds In The Trap Sing McKnight    0.791367   
6092                                   guessnobodyknows   -0.249642   
6093                    Birds In The Trap Sing McKnight    0.746105   
6094                                              Ænima    0.610322   
6095                                         Blues-Ette   -2.060092   

      danceability    energy  key  loudness  mode  speechiness  acousticness  \
0         0.157386 -0.903716    5 -1.160191     1    -0.598550     

In [65]:
# Read CSV file into pandas dataframe
df = pd.read_csv('dbscanned_trackdata.csv')

# Count the number of times each string appears in a column
count_df = df['genre'].value_counts().reset_index()

# Create a pie chart using Plotly
fig = px.pie(count_df, values='genre', names='index', width=832, height=468, template='plotly_white')
fig.update_layout(font=dict(size=18, family="Comic Sans"), title=dict(text="Number of Songs per Genre"),
                  autosize=False, margin=dict(l=0, r=0, t=50, b=0))
fig.update(layout_showlegend=False)
fig.update_traces(textposition='inside', textinfo='percent+label')
# Show the pie chart
fig.show()
fig.write_html("Graphs/piechart.html")