In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
combined_df = pd.read_csv('../cleandata/combined_spotify_data.csv')

In [4]:
import altair as alt
top10_genres = (
    combined_df.groupby("playlist_genre")["track_popularity"]
        .mean()
        .sort_values(ascending=False)
        .head(10)
)
genres_df = combined_df[combined_df["playlist_genre"].isin(top10_genres.index)]
genres_df = genres_df[["playlist_genre", "track_popularity"]]

legend_color = [
    "#8dd3c7", "#ffffb3", "#bebada", "#fb8072", "#80b1d3",
    "#fdb462", "#b3de69", "#fccde5", "#664bdd", "black"
]

selection = alt.selection_point(fields=["playlist_genre"])
slider = alt.binding_range(min=0, max=100, step=5, name="Max Popularity: ")
var = alt.param(value=100, bind=slider)

chart = (
    alt.Chart(genres_df)
    .transform_filter(
        alt.datum.track_popularity <= var
    )
    .transform_density(
        "track_popularity",
        as_=["track_popularity", "density"],
        groupby=["playlist_genre"]
    )
    .mark_line()
    .encode(
        x="track_popularity:Q",
        y="density:Q",
        color=alt.condition(
            selection,
            alt.Color("playlist_genre:N", scale=alt.Scale(range=legend_color)),
            alt.value("lightgray")
        )
    )
    .add_params(selection, var)
    .properties(
        width=800,
        height=400,
        title="Distribution Of Track Popularity Across The Top 10 Spotify Genres"
    )
)

chart


In [5]:
chart.save('altair_1.html')