In [1]:
import math
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
# Set random seed for everything (except sklearn)
RND_SEED: int = 12345
np.random.seed(RND_SEED) # for numpy, scipy
pd.core.common.random_state(RND_SEED) # for pandas

# Resolution for graph images
WIDTH: int = 1366
HEIGHT: int = 768

In [3]:
df = pd.read_csv("./../../data/Combined-2024.csv", encoding="utf-8", index_col=[0])

def to_normal_date(msg: str) -> str:
    m, d, y = msg.split("/")
    return f"{d}/{m}/{y}"

df["Release Date"] = df["Release Date"].astype(str).apply(to_normal_date)
df["Spotify Streams"]  = df["Spotify Streams"].astype(str).apply(
    lambda x: float(x.replace(",", "")) / 1e6 if x.replace(",", "").isdigit() else np.nan
)
df["Track"] = df["Track"].astype(str)
df["Album Name"] = df["Album Name"].astype(str)
df["Artist"] = df["Artist"].astype(str)
for i in range(0, df.shape[0]):
    df.loc[i, "Track"] = f"{df.loc[i, 'Track'][:10]} ({df.loc[i, 'Album Name'][:10]}) By {df.loc[i, 'Artist'][:10]}"
del i
df = df.loc[:, ["Track", "Spotify Streams", "Release Date"]].dropna()

In [5]:
fig = px.bar(
    df.sort_values(by="Spotify Streams", ascending=False).iloc[:50],
    x="Track",
    y="Spotify Streams"
)
fig.update_layout(
    title=dict(
        text=f"Top 50 Tracks with the Most Streams on Spotify 2024",
        font=dict(
            size=24
        ),
    ),
    xaxis=dict(title_text="Track Name"),
    yaxis=dict(title_text="Streams (Million)"),
    font=dict(
        size=18
    ),
    margin=dict(
        l=110, r=50,
        t=75, b=75,
    )
)
fig.show()
fig.write_image("./../../images/2024/name-stream.png", width=WIDTH, height=HEIGHT, scale=1.0)