# Spotify Charts â€“ Feature Exploration

This notebook loads the processed Spotify tracks dataset and uses the `charts` module
(PCA + clustering helpers) to create statistical and fancy visualisations of the audio features.


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from charts import plot_pca_scatter, plot_clustered_pca

sns.set_theme(style="whitegrid")

# Path to the processed features file
DATA_PATH = "data/processed/songs_with_features.csv"

# Load the data
songs = pd.read_csv(DATA_PATH)

# Quick overview
songs.head()


In [None]:
# Basic distribution plots for key audio features
numeric_cols = [
    "danceability",
    "energy",
    "loudness",
    "speechiness",
    "acousticness",
    "instrumentalness",
    "liveness",
    "valence",
    "tempo",
]

available = [c for c in numeric_cols if c in songs.columns]

songs[available].hist(figsize=(14, 10), bins=30)
plt.suptitle("Distributions of core audio features", fontsize=14)
plt.tight_layout()
plt.show()


In [None]:
# PCA scatter: 2D embedding of tracks based on audio features

fig, ax, pca_res = plot_pca_scatter(
    songs,
    feature_cols=numeric_cols,
    hue="popularity" if "popularity" in songs.columns else None,
    n_components=2,
    standardize=True,
    sample=3000,
)
fig


In [None]:
# k-means clustering visualised in PCA space

fig, ax, cluster_res = plot_clustered_pca(
    songs,
    feature_cols=numeric_cols,
    n_clusters=6,
    n_components=2,
    standardize=True,
    sample=3000,
)
fig
