In [1]:
import json

import altair as alt
import numpy as np
import pandas as pd

from pathlib import Path
from pandas.tseries.offsets import MonthBegin

In [2]:
data_folder = Path("data")
figs_folder = Path("figs")

In [3]:
df = pd.read_csv(
    data_folder / "Apple Music Play Activity.csv", 
    parse_dates=[
        'Event End Timestamp', 
        'Event Received Timestamp',
        'Event Start Timestamp'
    ]
)


df = df[df["Event Start Timestamp"].notna()]
df["year_month"] = df["Event Start Timestamp"].apply(lambda dt: f"{dt.year}-{dt.month}")
df["year_month"] = df["year_month"].apply(lambda s: pd.to_datetime(s, format="%Y-%m"))

df["year"] = df["Event Start Timestamp"].apply(lambda dt: dt.year)
df["month"] = df["Event Start Timestamp"].apply(lambda dt: dt.month)


df = df[df["year_month"] > pd.to_datetime("2018", format="%Y")]
df["full_comp_name"] = df.apply(lambda r: f"{r['Artist Name']} - {r['Content Name']}", axis=1)

In [4]:
color_df = pd.read_csv(data_folder / "material-colors.csv")
color_df = color_df[(color_df.color_name != "white") & (color_df.color_name != "black")].reset_index(drop=True)
color_df["base_color"] = color_df.color_name.apply(lambda s: s.split("-")[0])
color_df["intensity"] = color_df.color_name.apply(lambda s: int("".join(filter(str.isdigit, s.split("-")[1]))))
color_df["a_letter"] = color_df.color_name.apply(lambda s: "A" in s.split("-")[1])

In [5]:
first_time_df = pd.merge(
    df,
    df.groupby(["Artist Name", "Content Name"]).size().reset_index().rename(columns={0: "comp_count"}),
    on=["Artist Name", "Content Name"],
    how="left"
).sort_values(
    "Event Start Timestamp"
).groupby(
    "Artist Name"
).agg({
    "Content Name":"first",
    "Feature Name": "first",
    "Event Start Timestamp": "first",
    "Genre": "first",
    "comp_count": "first",
}).sort_values("Event Start Timestamp")

In [6]:
first_time_df["eventmonth"] = first_time_df["Event Start Timestamp"].round("D") - MonthBegin(1)

In [7]:
genre_df = pd.merge(
    first_time_df.reset_index(),
    first_time_df.groupby("eventmonth").size().reset_index().rename(columns={0: "monthcount"}),
    on="eventmonth",
    how="left",

)

In [8]:
genre_portion_df = genre_df.sort_values(
    by="comp_count",
    ascending=False,
).groupby(
    ["eventmonth", "Genre"]
).agg({
    "monthcount": "first",
    "comp_count": "first",
    "Artist Name": "first",
    "Content Name": "first",
})


genre_portion_df["portion"] =  genre_df.groupby(
    ["eventmonth", "Genre"]
).size() / genre_portion_df["monthcount"]

genre_portion_df = genre_portion_df.reset_index()

In [9]:
np.random.seed(42)
cur_color_df = color_df[color_df.intensity.isin([300, 600, 900])]
cur_color_df = cur_color_df[~cur_color_df.base_color.isin(["grey", "blueGrey"])]
cur_color_df = cur_color_df.reset_index(drop=True)
cur_color_indx = sorted(np.random.choice(range(0, len(cur_color_df)), size=50, replace=False))
color_scheme = cur_color_df.color_hex.take(cur_color_indx).tolist()

genre_select = alt.selection_single(empty="all", fields=["Genre"], on="mouseover") 


genre_chart = alt.Chart(
    genre_portion_df
).mark_bar(
    width=18
).encode(
    x=alt.X("eventmonth:T", title="Timeline"),
    y=alt.Y(
        "portion:Q", 
        stack=True,
        axis=alt.Axis(format='%', title='Percentage'), 
        scale=alt.Scale(domain=(0.0, 1.0)),
        title="Percentage",
    ),
    color=alt.Color(
        "Genre:N",
        scale=alt.Scale(range=color_scheme),
        legend=alt.Legend(title="Genre", orient="bottom", columns=13, symbolLimit=200),
    ),
    opacity=alt.condition(genre_select, alt.value(1.0), alt.value(0.1)),
#     order = alt.Order('portion:Q', aggregate='sum', sort = 'ascending'),
    tooltip=[
        alt.Tooltip("Genre:N", title="artist"),
        alt.Tooltip("portion:Q", title="portion"),
        alt.Tooltip("eventmonth:T", title="date"),
        alt.Tooltip("Artist Name:N", title="top artist"),
        alt.Tooltip("Content Name:N", title="top song"),
    ],
).properties(
    width=1200, height=600,
).add_selection(
    genre_select
).configure_axis(
    gridOpacity=0.3
)

genre_chart_name = "genre-chart"
genre_chart.save(str(figs_folder / f"{genre_chart_name}.html"))
genre_chart.save(str(figs_folder / f"{genre_chart_name}.png"))
genre_chart