# ðŸŽ¯ Playlist Analysis & Clustering

**Standalone analysis.** Run all cells from top to bottom. No other notebooks required.

- **Data:** Sync first (`python -m src.scripts.automation.sync`). Parquet files in `data/`.
- **Views:** Genre profiles per playlist, playlist similarity.

## Setup & load

In [None]:
import sys
from pathlib import Path
cwd = Path.cwd()
if (cwd / "src" / "notebooks" / "notebook_helpers.py").exists():
    sys.path.insert(0, str(cwd))
    sys.path.insert(0, str(cwd / "src" / "notebooks"))
elif (cwd / "notebook_helpers.py").exists():
    sys.path.insert(0, str(cwd.parent.parent))

from notebook_helpers import setup_standalone, analyze_library, build_playlist_genre_profiles
from notebook_helpers import analyze_playlist_similarity

PROJECT_ROOT, DATA_DIR = setup_standalone()

result = analyze_library(DATA_DIR, exclude_liked_songs=True, exclude_monthly=False)
analyzer = result['analyzer']

## Genre profiles per playlist

In [None]:
profiles = build_playlist_genre_profiles(analyzer)
playlist_names = analyzer.playlists_all.set_index('playlist_id')['name'].to_dict()
for pid, counter in list(profiles.items())[:5]:
    name = playlist_names.get(pid, pid)
    top_genres = counter.most_common(5)
    print(f"{name}: {dict(top_genres)}")

## Playlist similarity

In [None]:
similarity_engine = analyze_playlist_similarity(analyzer, search_mode='followed_only')
owned = analyzer.playlists[analyzer.playlists['is_owned'] == True]
if len(owned) > 0:
    sample_id = owned['playlist_id'].iloc[0]
    sample_name = owned['name'].iloc[0]
    similar = similarity_engine.find_similar(sample_id, top_n=5)
    print(f"Similar to '{sample_name}':")
    for s in similar:
        print(f"  {s.get('similarity', 0):.2f}  {s.get('name', '')}")