# Notebook 02 — Feature Extraction (YAMNet embeddings)
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Ayush-1204/Speaker_Recognition_System/blob/main/notebooks/02_feature_extraction.ipynb)

This notebook extracts embeddings using **YAMNet** (TensorFlow Hub) for every processed WAV under `data/processed/` and saves them into `data/features/` along with a manifest.

**Outputs**:
- Embeddings saved to `data/features/*.npy`
- Manifest saved to `metadata/features_manifest.json`


In [None]:
!pip install --quiet tensorflow tensorflow_hub soundfile librosa numpy


In [None]:
from pathlib import Path
import json
import numpy as np
import soundfile as sf
import librosa
import tensorflow as tf
import tensorflow_hub as hub

ROOT = Path('.')
PROCESSED = ROOT / 'data' / 'processed'
FEATURES = ROOT / 'data' / 'features'
METADATA_DIR = ROOT / 'metadata'
FEATURES_MANIFEST = METADATA_DIR / 'features_manifest.json'

for p in [FEATURES, METADATA_DIR]:
    p.mkdir(parents=True, exist_ok=True)
print('Processed dir:', PROCESSED)


In [None]:
def load_wav(path, target_sr=16000):
    audio, sr = sf.read(str(path))
    if audio.ndim > 1:
        audio = audio.mean(axis=1)
    if sr != target_sr:
        audio = librosa.resample(audio.astype('float32'), orig_sr=sr, target_sr=target_sr)
    return audio.astype('float32'), target_sr

wav_list = list(PROCESSED.rglob('*.wav'))
print('Found', len(wav_list), 'files')


In [None]:
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet = hub.load(yamnet_model_handle)
print('YAMNet loaded')

def extract_emb(waveform):
    wf = tf.convert_to_tensor(waveform, dtype=tf.float32)
    _, embeddings, _ = yamnet(wf)
    return embeddings.numpy().mean(axis=0)


In [None]:
manifest = []
for wav_path in wav_list:
    rel = wav_path.relative_to(PROCESSED)
    audio, sr = load_wav(wav_path)
    emb = extract_emb(audio)
    feat_path = FEATURES / (str(rel).replace('/', '_') + '.npy')
    np.save(str(feat_path), emb)
    parts = rel.parts
    label = 'familiar' if parts[0]=='familiar' else 'stranger'
    speaker_id = parts[1] if label=='familiar' else None
    manifest.append({'wav': str(wav_path), 'emb': str(feat_path), 'label': label, 'speaker_id': speaker_id})

with open(FEATURES_MANIFEST, 'w') as f:
    json.dump(manifest, f, indent=2)
print('✅ Done — Manifest saved:', FEATURES_MANIFEST)
