In [1]:
import plotly.express as px
import pandas as pd
from jupyter_dash import JupyterDash
from dash import Dash, dcc, html, Input, Output

In [2]:
# Read in the CSV file
df = pd.read_csv('outlier_removal/automated_dbscanned_trackdata.csv')

app = JupyterDash(__name__)

app.layout = html.Div([
    dcc.Dropdown(
        id="dropdown",
        options=["popularity", "danceability", "energy","key","loudness", "speechiness",
                 "acousticness", 'instrumentalness', "liveness","valence","tempo",
                 "duration_ms","time_signature"],
        value="popularity",
        clearable=False,
    ),
    dcc.Graph(id="graph"),
])

@app.callback(
    Output("graph", "figure"),
    Input("dropdown", "value"))

def update_bar_chart(element):
    grouped = df.groupby('genre')
    means = grouped[element].mean()
    means = means.sort_values()

    fig = px.bar(y=means.index, x=abs(means), color=means, color_continuous_scale="Rainbow", 
                title="Average " + element.capitalize() + " per Genre", template='plotly_dark', text=means.round(3))

    fig.update_layout(coloraxis_showscale=False, xaxis_title="Average " + element.capitalize(), yaxis_title="Genre",height=500)
    fig.update_traces(hovertemplate=None, hoverinfo='skip')
        
    return fig
    
app.run_server()

Dash is running on http://127.0.0.1:8050/

Dash app running on http://127.0.0.1:8050/


In [6]:
# df = pd.read_csv("zscore_normalised_trackdata.csv")
# df1 = df.iloc[:,3:].drop('id', axis = 1)

# df = pd.read_csv("outlier_removal/automated_zscore_normalised_trackdata.csv")
df = pd.read_csv("outlier_removal/automated_collinearity_removed.csv")
df1 = df.iloc[:,3:]

print(df1)
features = df1.iloc[:,:-1]

corrvals = features.corr().round(2)
import seaborn as sn
fig=px.imshow(corrvals,text_auto=True, aspect="Auto",title="Correlation Matrix", width=832, height=468, template='plotly_white',
              color_continuous_scale="tealgrn", labels=dict(color="Correlation"))
fig.update_layout(font=dict(size=18, family="Comic Sans"), coloraxis_showscale=False, autosize=False, margin=dict(l=0, r=10, t=50, b=0))
fig.update(layout_showlegend=False)
fig.show()

# fig.write_html("Graphs/CorrelationMatrix_new_zscore.html")

fig.write_html("Graphs/CorrelationMatrix_new_zscore_removed.html")

      key  mode  speechiness  acousticness  instrumentalness  liveness  \
0       5     1    -0.599011     -0.122394          0.222096 -0.412575   
1       2     1    -0.374430      1.679224          1.125460  0.920446   
2       8     1    -0.375706      1.976961          2.311764  0.509195   
3       8     1    -0.516069      1.864550          1.728839 -0.810354   
4      10     0    -0.444612      1.509088         -0.482323 -0.327488   
...   ...   ...          ...           ...               ...       ...   
6053    5     0    -0.551798      0.202687         -0.483551 -0.398393   
6054   11     0    -0.399951      1.876703          0.556170  0.126306   
6055    0     1    -0.387191     -0.216576         -0.483551  3.097241   
6056    0     1     3.086155     -0.885180          0.382315 -0.584165   
6057   11     0    -0.371878      1.952656          0.658438 -0.594801   

       valence     tempo  duration_ms  time_signature       genre  
0     1.708121  0.461228     1.519717      

In [8]:
# Read CSV file into pandas dataframe
df = pd.read_csv('outlier_removal/automated_dbscanned_trackdata.csv')

# Count the number of times each string appears in a column
count_df = df['genre'].value_counts().reset_index()

# Create a pie chart using Plotly
fig = px.pie(count_df, values='genre', names='index', width=832, height=468, template='plotly_white')
fig.update_layout(font=dict(size=18, family="Comic Sans"), title=dict(text="Number of Songs per Genre"),
                  autosize=False, margin=dict(l=0, r=0, t=50, b=0))
fig.update(layout_showlegend=False)
fig.update_traces(textposition='inside', textinfo='percent+label')
# Show the pie chart
fig.show()
fig.write_html("Graphs/piechart.html")