In [1]:
# Spotify Top 50 (2019) - Figures Generator
# Day 2 Deliverable

import pandas as pd
import matplotlib.pyplot as plt
from graphviz import Digraph
import os

# -----------------------------
# 1. Setup
# -----------------------------
os.makedirs("figures", exist_ok=True)

# Load data (adjust filename if different)
df = pd.read_csv("data/top50.csv", encoding="latin-1")

# Clean column names for easy access
df.columns = [c.strip().replace(".", "_").replace(" ", "_") for c in df.columns]

# Quick check
df.head()
dot = Digraph(comment="Spotify ERD", format="png")

# Tables
dot.node("Artist", "Artist\nArtistID (PK)\nArtist_Name")
dot.node("Genre", "Genre\nGenreID (PK)\nGenre")
dot.node("Track", "Track\nTrackID (PK)\nTrack_Name\nArtistID (FK)\nGenreID (FK)\nEnergy\nLength_seconds\nPopularity")

# Relations
dot.edge("Artist", "Track", label="1 - *")
dot.edge("Genre", "Track", label="1 - *")

# Save ERD as PNG
dot.render("figures/erd", view=False)
plt.figure(figsize=(8,5))
df["Genre"].value_counts().plot(kind="bar", color="skyblue", edgecolor="black")
plt.title("Genre Distribution – Spotify Top 50 (2019)")
plt.xlabel("Genre")
plt.ylabel("Number of Tracks")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.savefig("figures/genre_distribution.png")
plt.show()
plt.figure(figsize=(7,5))
plt.scatter(df["Energy"], df["Popularity"], alpha=0.7, c="green", edgecolors="black")
plt.title("Energy vs Popularity")
plt.xlabel("Energy")
plt.ylabel("Popularity")
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("figures/energy_vs_popularity.png")
plt.show()
plt.figure(figsize=(7,5))
plt.scatter(df["Energy"], df["Popularity"], alpha=0.7, c="green", edgecolors="black")
plt.title("Energy vs Popularity")
plt.xlabel("Energy")
plt.ylabel("Popularity")
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("figures/energy_vs_popularity.png")
plt.show()
# 📊 Spotify Top 50 (2019) – Figures

## 1. ERD
![ERD](figures/erd.png)

## 2. Genre Distribution
![Genre Distribution](figures/genre_distribution.png)

## 3. Energy vs Popularity
![Energy vs Popularity](figures/energy_vs_popularity.png)

## 4. Top 10 Tracks
![Top 10 Tracks](figures/top10_tracks.png)


ModuleNotFoundError: No module named 'graphviz'