# Visualization
## Amazon Music Clustering Project

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
import pickle

### 1. Load Data

In [None]:
df = pd.read_csv('../data/processed/clustered_data.csv')
X_scaled_df = pd.read_csv('../data/processed/scaled_features.csv')

### 2. PCA for 2D Visualization

In [None]:
pca = PCA(n_components=2)
pca_components = pca.fit_transform(X_scaled_df.drop('cluster', axis=1, errors='ignore'))

df['pca1'] = pca_components[:, 0]
df['pca2'] = pca_components[:, 1]

# Save PCA Model
with open('../models/pca_model.pkl', 'wb') as f:
    pickle.dump(pca, f)

### 3. Plot Clusters

In [None]:
plt.figure(figsize=(12, 8))
sns.scatterplot(x='pca1', y='pca2', hue='cluster', data=df, palette='viridis', s=100)
plt.title('Clusters Visualized using PCA')
plt.show()

### 4. Feature Distribution by Cluster

In [None]:
features = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']

for col in features:
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='cluster', y=col, data=df)
    plt.title(f'{col} Distribution by Cluster')
    plt.show()