# üìà Notebook 05 ‚Äì Visualiza√ß√µes Avan√ßadas
Este notebook cont√©m visualiza√ß√µes avan√ßadas para o projeto MelodIA:

- t-SNE e UMAP das embeddings do BirdNET
- Visualiza√ß√£o de MFCCs (2D e 3D)
- Espectrograma 3D (surface plot)

**Observa√ß√£o:** instale as depend√™ncias necess√°rias: `scikit-learn`, `umap-learn`, `librosa`, `plotly` (opcional), `matplotlib`.

In [None]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.manifold import TSNE
import umap
import librosa
import librosa.display
import pickle
import os

# Optional interactive plotting
try:
    import plotly.express as px
    PLOTLY = True
except Exception:
    PLOTLY = False

print('Imports OK. Plotly available:', PLOTLY)

## 1) Carregar embeddings

Carregue o arquivo `data/embeddings.pkl` que deve conter um DataFrame com colunas `embedding` (iterable) e `species`. Se voc√™ salvou em outro diret√≥rio, ajuste o caminho.

In [None]:
emb_path = 'data/embeddings.pkl'
if not os.path.exists(emb_path):
    raise FileNotFoundError(f'Arquivo de embeddings n√£o encontrado em: {emb_path}')

df = pd.read_pickle(emb_path)
# converter embeddings para array 2D
X = np.vstack(df['embedding'].values)
y = df['species'].values

print('Embeddings shape:', X.shape)
print('N√∫mero de classes:', len(np.unique(y)))

## 2) t-SNE (2D) das embeddings

Reduza para 2D com t-SNE e plote um scatter colorido por esp√©cie (mostre apenas top-N esp√©cies para melhor visualiza√ß√£o).

In [None]:
from collections import Counter

# selecionar top N esp√©cies mais frequentes para visualiza√ß√£o
N = 8
top_species = [s for s, _ in Counter(y).most_common(N)]
mask = np.isin(y, top_species)
X_sel = X[mask]
y_sel = y[mask]

print('Visualizando esp√©cies:', top_species)

tsne = TSNE(n_components=2, random_state=42, init='pca', learning_rate='auto')
X_tsne = tsne.fit_transform(X_sel)

plt.figure(figsize=(10,8))
for sp in np.unique(y_sel):
    idx = y_sel == sp
    plt.scatter(X_tsne[idx,0], X_tsne[idx,1], label=sp, s=30)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.title('t-SNE das embeddings (top {} esp√©cies)'.format(N))
plt.xlabel('tsne-1')
plt.ylabel('tsne-2')
plt.tight_layout()
plt.show()

## 3) UMAP (2D e 3D)

Fazer redu√ß√£o com UMAP; UMAP costuma preservar melhor a estrutura global em compara√ß√£o ao t-SNE e √© mais r√°pido.

In [None]:
reducer = umap.UMAP(n_components=2, random_state=42)
X_umap2 = reducer.fit_transform(X_sel)

plt.figure(figsize=(10,8))
for sp in np.unique(y_sel):
    idx = y_sel == sp
    plt.scatter(X_umap2[idx,0], X_umap2[idx,1], label=sp, s=30)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.title('UMAP 2D das embeddings (top {} esp√©cies)'.format(N))
plt.tight_layout()
plt.show()

In [None]:
# UMAP 3D
reducer3 = umap.UMAP(n_components=3, random_state=42)
X_umap3 = reducer3.fit_transform(X_sel)

fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111, projection='3d')
for sp in np.unique(y_sel):
    idx = y_sel == sp
    ax.scatter(X_umap3[idx,0], X_umap3[idx,1], X_umap3[idx,2], label=sp, s=20)
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax.set_title('UMAP 3D (top {} esp√©cies)'.format(N))
plt.show()

## 4) MFCCs ‚Äî visualiza√ß√£o 2D e 3D

Extraia MFCCs de alguns √°udios de amostra e plote as 13-20 primeiras coeficientes; agregue por tempo (mean, std) e visualize em 3D (coeficiente vs tempo vs amplitude m√©dia).

In [None]:
# Lista de arquivos de exemplo (verifique se existem em samples/)
sample_files = ['samples/ave1.wav', 'samples/ave2.wav', 'samples/ave3.wav']

for f in sample_files:
    if not os.path.exists(f):
        print('Aviso: arquivo de exemplo n√£o encontrado:', f)

# fun√ß√£o para extrair MFCCs
def compute_mfcc(audio_path, sr=22050, n_mfcc=20):
    y, sr = librosa.load(audio_path, sr=sr)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfcc, sr

# plot MFCC 2D para o primeiro arquivo
mfcc, sr = compute_mfcc(sample_files[0])
plt.figure(figsize=(12,5))
librosa.display.specshow(mfcc, x_axis='time')
plt.colorbar()
plt.title('MFCC ({} coeficientes) - {}'.format(mfcc.shape[0], os.path.basename(sample_files[0])))
plt.show()

In [None]:
# 3D plot de MFCCs (coeficiente x tempo x amplitude m√©dia)
from mpl_toolkits.mplot3d import Axes3D

mfcc, sr = compute_mfcc(sample_files[0], n_mfcc=20)
T = mfcc.shape[1]
coefs = np.arange(1, mfcc.shape[0]+1)

# Preparar malha
Coefs, Time = np.meshgrid(coefs, np.arange(T))
Z = mfcc.T  # shape (T, n_mfcc)

fig = plt.figure(figsize=(14,6))
ax = fig.add_subplot(111, projection='3d')
# reduzir para plot mais leve (subsample)
step = max(1, T//150)
ax.plot_surface(Coefs[::step,::], Time[::step,::], Z[::step,::], cmap='viridis')
ax.set_xlabel('Coeficiente MFCC')
ax.set_ylabel('Frame (tempo)')
ax.set_zlabel('Amplitude')
ax.set_title('Espectro MFCC 3D - {}'.format(os.path.basename(sample_files[0])))
plt.show()

## 5) Espectrograma 3D (Surface plot)

Converter o Mel-spectrogram em um gr√°fico 3D de superf√≠cie: tempo (x), frequ√™ncia (y), intensidade (z).

In [None]:
def plot_spectrogram_3d(audio_path, n_mels=128):
    y, sr = librosa.load(audio_path, sr=None)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    S_db = librosa.power_to_db(S, ref=np.max)

    # construir malha
    T = S_db.shape[1]
    F = S_db.shape[0]
    time = np.linspace(0, len(y)/sr, num=T)
    freq = np.linspace(0, sr/2, num=F)
    Time, Freq = np.meshgrid(time, freq)

    fig = plt.figure(figsize=(14,7))
    ax = fig.add_subplot(111, projection='3d')
    surf = ax.plot_surface(Time, Freq, S_db, cmap='magma', linewidth=0, antialiased=False)
    ax.set_xlabel('Time (s)')
    ax.set_ylabel('Frequency (Hz)')
    ax.set_zlabel('Amplitude (dB)')
    ax.set_title('Spectrogram 3D - ' + os.path.basename(audio_path))
    fig.colorbar(surf, shrink=0.5, aspect=10)
    plt.show()

# plot for sample file
plot_spectrogram_3d(sample_files[0])

## 6) Plot interativo (Plotly) ‚Äî UMAP (se plotly estiver dispon√≠vel)

Plotly oferece intera√ß√£o (zoom, hover) que √© √∫til em apresenta√ß√µes; caso n√£o esteja instalado, o notebook seguir√° com matplotlib est√°tico.

In [None]:
if PLOTLY:
    df_plot = pd.DataFrame(X_umap2, columns=['x','y'])
    df_plot['species'] = y_sel
    fig = px.scatter(df_plot, x='x', y='y', color='species', title='UMAP 2D interativo (Plotly)')
    fig.show()
else:
    print('Plotly n√£o dispon√≠vel. Instale com: pip install plotly')