# Notebook 03 — Model Training & Evaluation

This notebook trains two classifiers on the YAMNet embeddings produced in Notebook 02:
1. A lightweight TensorFlow Keras DNN (easy to convert to TFLite)
2. An XGBoost classifier (strong baseline)

It computes Accuracy, Confusion Matrix, ROC & AUC, Precision/Recall, and saves the best models.

**Inputs required**:
- `metadata/features_manifest.json` (created by Notebook 02)
- `data/features/*.npy` (embeddings)

Run this in Google Colab (recommended) or locally.


In [None]:
# Install dependencies (run once in Colab)
!pip install --quiet tensorflow scikit-learn xgboost matplotlib seaborn


In [None]:
import json
from pathlib import Path
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import joblib

ROOT = Path('.')
FEATURES_MANIFEST = ROOT / 'metadata' / 'features_manifest.json'
MODEL_DIR = ROOT / 'models'
MODEL_DIR.mkdir(parents=True, exist_ok=True)

with open(FEATURES_MANIFEST, 'r') as f:
    manifest = json.load(f)

print('Loaded manifest entries:', len(manifest))


In [None]:
# Load features and labels
X = []
y = []
speaker_ids = []
for item in manifest:
    emb_path = item['feature_path']
    if not Path(emb_path).exists():
        print('Missing emb:', emb_path)
        continue
    emb = np.load(emb_path)
    X.append(emb)
    label = 1 if item['label']=='familiar' else 0
    y.append(label)
    speaker_ids.append(item.get('speaker_id'))

X = np.stack(X)
y = np.array(y)
print('Features shape', X.shape, 'Labels shape', y.shape)


In [None]:
# Train/Val/Test split
X_train, X_tmp, y_train, y_tmp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_tmp, y_tmp, test_size=0.5, stratify=y_tmp, random_state=42)
print('Train', X_train.shape, 'Val', X_val.shape, 'Test', X_test.shape)


## A) Keras DNN classifier
Lightweight architecture that is easy to convert to TF-Lite for Android deployment.

In [None]:
from tensorflow.keras import layers, models, optimizers, callbacks

def build_dnn(input_dim=1024):
    inp = layers.Input(shape=(input_dim,))
    x = layers.Dense(256, activation='relu')(inp)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    out = layers.Dense(1, activation='sigmoid')(x)
    model = models.Model(inp, out)
    model.compile(optimizer=optimizers.Adam(1e-3), loss='binary_crossentropy', metrics=['accuracy'])
    return model

dnn = build_dnn(X_train.shape[1])
dnn.summary()


In [None]:
es = callbacks.EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)
mc = callbacks.ModelCheckpoint(str(MODEL_DIR / 'dnn_best.h5'), monitor='val_loss', save_best_only=True)

history = dnn.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, batch_size=32, callbacks=[es, mc])


In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()
plt.title('DNN Loss')
plt.show()


In [None]:
# Evaluate DNN on test set
dnn_preds = dnn.predict(X_test).ravel()
dnn_pred_labels = (dnn_preds >= 0.5).astype(int)
print('DNN Accuracy:', accuracy_score(y_test, dnn_pred_labels))
print('DNN ROC AUC:', roc_auc_score(y_test, dnn_preds))
print(classification_report(y_test, dnn_pred_labels))
cm = confusion_matrix(y_test, dnn_pred_labels)
sns.heatmap(cm, annot=True, fmt='d')
plt.title('DNN Confusion Matrix')
plt.show()


## B) XGBoost classifier
A strong tree-based baseline. We'll train and compare.


In [None]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

xgb_clf = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_clf.fit(X_train, y_train)
xgb_preds = xgb_clf.predict_proba(X_test)[:,1]
xgb_labels = (xgb_preds >= 0.5).astype(int)
print('XGBoost Accuracy:', accuracy_score(y_test, xgb_labels))
print('XGBoost ROC AUC:', roc_auc_score(y_test, xgb_preds))
print(classification_report(y_test, xgb_labels))
cm2 = confusion_matrix(y_test, xgb_labels)
sns.heatmap(cm2, annot=True, fmt='d')
plt.title('XGBoost Confusion Matrix')
plt.show()


In [None]:
# Save models
dnn.save(str(MODEL_DIR / 'dnn_final'))
joblib.dump(xgb_clf, str(MODEL_DIR / 'xgb_final.joblib'))
print('Saved DNN and XGBoost models to', MODEL_DIR)


In [None]:
# Optional: export DNN to TensorFlow Lite for Android
converter = tf.lite.TFLiteConverter.from_saved_model(str(MODEL_DIR / 'dnn_final'))
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
open(str(MODEL_DIR / 'dnn.tflite'), 'wb').write(tflite_model)
print('Saved TFLite model:', MODEL_DIR / 'dnn.tflite')


## Next steps
- Tune thresholds, collect more in-domain (Indian accent) data, and consider metric learning (AM-Softmax, triplet loss) for improved embeddings.
- Integrate model into backend for real-time verification and Android client using TFLite.
