# CAV Visualization

This notebook visualizes the generated CAV embeddings using dimensionality reduction techniques (UMAP/t-SNE) and clustering analysis.


In [None]:
import sys
import os
sys.path.insert(0, os.path.abspath('../'))

import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.manifold import TSNE
try:
    import umap
    HAS_UMAP = True
except ImportError:
    HAS_UMAP = False
    print("UMAP not installed. Install with: pip install umap-learn")

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


## Load CAV Data


In [None]:
# Load data
with open('../data/edon_cav.json', 'r') as f:
    data = json.load(f)

print(f"Loaded {len(data)} CAV records")

# Extract embeddings and metadata
embeddings = np.array([r['cav128'] for r in data])
activities = [r['activity'] for r in data]
hr_values = [r['bio']['hr'] for r in data]
temp_values = [r['env']['temp_c'] for r in data]

print(f"Embedding shape: {embeddings.shape}")


## Dimensionality Reduction

Reduce 128-D embeddings to 2-D for visualization using t-SNE or UMAP.


In [None]:
# Sample for faster computation (optional)
sample_size = min(5000, len(embeddings))
indices = np.random.choice(len(embeddings), sample_size, replace=False)
embeddings_sample = embeddings[indices]
activities_sample = [activities[i] for i in indices]
hr_sample = [hr_values[i] for i in indices]

print(f"Sampling {sample_size} records for visualization")


In [None]:
# t-SNE
print("Computing t-SNE...")
tsne = TSNE(n_components=2, random_state=42, perplexity=30)
embeddings_2d_tsne = tsne.fit_transform(embeddings_sample)

# UMAP (if available)
if HAS_UMAP:
    print("Computing UMAP...")
    reducer = umap.UMAP(n_components=2, random_state=42)
    embeddings_2d_umap = reducer.fit_transform(embeddings_sample)
else:
    embeddings_2d_umap = None


## Visualize by Activity


In [None]:
# Map activities to numeric values for coloring
unique_activities = sorted(set(activities_sample))
activity_to_num = {act: i for i, act in enumerate(unique_activities)}
activity_nums = [activity_to_num[act] for act in activities_sample]

fig, axes = plt.subplots(1, 2 if HAS_UMAP else 1, figsize=(15, 6))
if not HAS_UMAP:
    axes = [axes]

# t-SNE plot
ax = axes[0]
scatter = ax.scatter(embeddings_2d_tsne[:, 0], embeddings_2d_tsne[:, 1], 
                     c=activity_nums, cmap='tab10', alpha=0.6, s=10)
ax.set_title('t-SNE Visualization by Activity')
ax.set_xlabel('t-SNE 1')
ax.set_ylabel('t-SNE 2')
# Create custom colorbar with activity labels
cbar = plt.colorbar(scatter, ax=ax)
cbar.set_ticks(range(len(unique_activities)))
cbar.set_ticklabels(unique_activities)
cbar.set_label('Activity')

# UMAP plot
if HAS_UMAP:
    ax = axes[1]
    scatter = ax.scatter(embeddings_2d_umap[:, 0], embeddings_2d_umap[:, 1],
                         c=activity_nums, cmap='tab10', alpha=0.6, s=10)
    ax.set_title('UMAP Visualization by Activity')
    ax.set_xlabel('UMAP 1')
    ax.set_ylabel('UMAP 2')
    cbar = plt.colorbar(scatter, ax=ax)
    cbar.set_ticks(range(len(unique_activities)))
    cbar.set_ticklabels(unique_activities)
    cbar.set_label('Activity')

plt.tight_layout()
plt.show()


## Visualize by Heart Rate


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 8))
scatter = ax.scatter(embeddings_2d_tsne[:, 0], embeddings_2d_tsne[:, 1],
                     c=hr_sample, cmap='coolwarm', alpha=0.6, s=10)
ax.set_title('t-SNE Visualization by Heart Rate')
ax.set_xlabel('t-SNE 1')
ax.set_ylabel('t-SNE 2')
plt.colorbar(scatter, ax=ax, label='Heart Rate (BPM)')
plt.tight_layout()
plt.show()
