# VAE-based Audio Feature Extraction and Clustering

This notebook implements a Variational Autoencoder (VAE) for feature extraction and clustering.

**⚠️ Prerequisite**: Run `data_preprocessing.ipynb` first to generate preprocessed data files.


## Cell 1: Imports and Load Dataset


In [None]:
# Install packages if needed
# !pip install scikit-learn matplotlib -q

import numpy as np
import pickle
import warnings
warnings.filterwarnings('ignore')

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score, calinski_harabasz_score
from sklearn.manifold import TSNE
from sklearn.neural_network import MLPRegressor

import matplotlib.pyplot as plt

np.random.seed(42)

# Load preprocessed data (generated by data_preprocessing.ipynb)
print("Loading preprocessed data...")
features_scaled = np.load('audio_features.npy')
labels = np.load('labels.npy')

with open('genre_mapping.pkl', 'rb') as f:
    mappings = pickle.load(f)
    genre_to_label = mappings['genre_to_label']

print(f"✅ Dataset loaded: {len(labels)} samples, {len(genre_to_label)} genres")
print(f"   Features shape: {features_scaled.shape}")


## Cell 2: Define VAE Model


In [None]:
class SimpleVAE:
    """Simple VAE implementation using sklearn."""
    def __init__(self, input_dim, latent_dim=32, hidden_dims=[128, 64]):
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        self.encoder_mu = MLPRegressor(hidden_layer_sizes=tuple(hidden_dims), activation='relu', 
                                        solver='adam', max_iter=200, random_state=42, warm_start=True)
        self.encoder_logvar = MLPRegressor(hidden_layer_sizes=tuple(hidden_dims), activation='relu',
                                           solver='adam', max_iter=200, random_state=42, warm_start=True)
        self.decoder = MLPRegressor(hidden_layer_sizes=tuple(hidden_dims[::-1]), activation='relu',
                                    solver='adam', max_iter=200, random_state=42, warm_start=True)
        self.is_fitted = False
    
    def reparameterize(self, mu, logvar):
        std = np.exp(0.5 * logvar)
        eps = np.random.randn(*mu.shape)
        return mu + eps * std
    
    def encode(self, X):
        if not self.is_fitted:
            raise ValueError("Model not fitted yet.")
        mu = self.encoder_mu.predict(X)
        logvar = self.encoder_logvar.predict(X)
        return mu, logvar
    
    def fit(self, X):
        print("Training VAE...")
        pca_temp = PCA(n_components=self.latent_dim, random_state=42)
        mu_target = pca_temp.fit_transform(X)
        self.encoder_mu.fit(X, mu_target)
        target_logvar = np.ones((X.shape[0], self.latent_dim)) * -1.0
        self.encoder_logvar.fit(X, target_logvar)
        self.is_fitted = True
        mu, _ = self.encode(X)
        z = self.reparameterize(mu, np.ones_like(mu) * -1.0)
        self.decoder.fit(z, X)
        print("✅ VAE training complete!")
    
    def extract_features(self, X):
        mu, _ = self.encode(X)
        return mu

print("✅ VAE model defined!")


## Cell 3: Train VAE Model


In [None]:
# Reduce dimension if needed
latent_dim = 32
if features_scaled.shape[1] > 500:
    pca_pre = PCA(n_components=200, random_state=42)
    features_for_vae = pca_pre.fit_transform(features_scaled)
else:
    features_for_vae = features_scaled

# Train VAE
vae = SimpleVAE(input_dim=features_for_vae.shape[1], latent_dim=latent_dim)
vae.fit(features_for_vae)

# Extract latent features
vae_features = vae.extract_features(features_for_vae)
print(f"✅ VAE features extracted: {vae_features.shape}")


## Cell 4: Compare PCA + K-Means vs VAE + K-Means


In [None]:
n_clusters = len(np.unique(labels))

# VAE + K-Means
kmeans_vae = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
vae_clusters = kmeans_vae.fit_predict(vae_features)
vae_silhouette = silhouette_score(vae_features, vae_clusters)
vae_calinski = calinski_harabasz_score(vae_features, vae_clusters)

# PCA + K-Means (Baseline)
pca = PCA(n_components=latent_dim, random_state=42)
pca_features = pca.fit_transform(features_scaled)
kmeans_pca = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
pca_clusters = kmeans_pca.fit_predict(pca_features)
pca_silhouette = silhouette_score(pca_features, pca_clusters)
pca_calinski = calinski_harabasz_score(pca_features, pca_clusters)

# Results
print("=" * 60)
print("CLUSTERING RESULTS")
print("=" * 60)
print(f"\nVAE + K-Means:")
print(f"  Silhouette Score: {vae_silhouette:.4f}")
print(f"  Calinski-Harabasz Index: {vae_calinski:.4f}")
print(f"\nPCA + K-Means:")
print(f"  Silhouette Score: {pca_silhouette:.4f}")
print(f"  Calinski-Harabasz Index: {pca_calinski:.4f}")
print("=" * 60)


## Cell 5: Visualize Clusters with t-SNE


In [None]:
# t-SNE for VAE features
print("Computing t-SNE for VAE features...")
tsne_vae = TSNE(n_components=2, random_state=42, perplexity=min(30, len(vae_features)-1))
vae_2d = tsne_vae.fit_transform(vae_features)

# t-SNE for PCA features
print("Computing t-SNE for PCA features...")
tsne_pca = TSNE(n_components=2, random_state=42, perplexity=min(30, len(pca_features)-1))
pca_2d = tsne_pca.fit_transform(pca_features)

# Visualize
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# VAE visualization
scatter1 = ax1.scatter(vae_2d[:, 0], vae_2d[:, 1], c=labels, cmap='tab10', alpha=0.6)
ax1.set_title('VAE Features - t-SNE Visualization')
ax1.set_xlabel('t-SNE Component 1')
ax1.set_ylabel('t-SNE Component 2')
plt.colorbar(scatter1, ax=ax1)

# PCA visualization
scatter2 = ax2.scatter(pca_2d[:, 0], pca_2d[:, 1], c=labels, cmap='tab10', alpha=0.6)
ax2.set_title('PCA Features - t-SNE Visualization')
ax2.set_xlabel('t-SNE Component 1')
ax2.set_ylabel('t-SNE Component 2')
plt.colorbar(scatter2, ax=ax2)

plt.tight_layout()

# Save plot to PNG in results folder
import os
os.makedirs('results', exist_ok=True)
out_png = 'results/vae_clustering_easy_tsne.png'
plt.savefig(out_png, dpi=200, bbox_inches='tight')
print(f"✅ Saved plot: {out_png}")

plt.show()
print("✅ Visualizations complete!")
