# Doorbell Detection Model Training

This notebook trains a machine learning model to detect doorbell sounds from audio recordings.

## 1. Setup and Imports

Import all necessary libraries

In [None]:
# Import required libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

# Import our custom audio processing utilities
from doorbell_detection_utils import process_dataset, extract_features


## 2. Configure Directories and Parameters

Set up paths and audio processing parameters.

In [2]:
# Define parameters for audio processing and feature extraction
sample_rate = 16000
segment_duration = 1.0    # in seconds
hop_duration = 0.5        # in seconds
n_mfcc = 20               # Number of MFCC coefficients
n_mels = 40               # Number of Mel bands
n_fft = 1024              # FFT window size
hop_length = 512          # Hop length for FFT
augment = True            # Whether to apply data augmentation
n_max_background_samples = 2000  # Maximum number of negative examples
n_max_mixed_samples_coef = 0.3   # Coefficient to calculate the maximum number of mixed examples

## 3. Load and Process Audio Data

Now we'll process the audio files and extract features for our model.

In [None]:
# Set paths for audio files
audio_samples_path = './audio_samples'
os.makedirs(audio_samples_path, exist_ok=True)
print(f"Audio samples directory: {audio_samples_path}")

# Check if doorbell audio file exists
doorbell_path_mp3 = os.path.join(audio_samples_path, 'doorbell.mp3')
doorbell_path_wav = os.path.join(audio_samples_path, 'doorbell.wav')
if os.path.exists(doorbell_path_mp3):
    doorbell_path = doorbell_path_mp3
elif os.path.exists(doorbell_path_wav):
    doorbell_path = doorbell_path_wav
else:
    print("ERROR: doorbell.mp3 or doorbell.wav not found!")
    print("Please add your doorbell recording as 'doorbell.mp3' or 'doorbell.wav' in the audio_samples folder")
    print("The model training will not be effective without a doorbell sample.")
    doorbell_path = None
    
if doorbell_path:
    print(f"Doorbell sample found: {doorbell_path}")
    # Process audio data and extract features
    features, labels = process_dataset(
        doorbell_path=doorbell_path,
        audio_dirs=[audio_samples_path],
        sample_rate=sample_rate,
        segment_duration=segment_duration,
        hop_duration=hop_duration,
        n_mfcc=n_mfcc,
        n_mels=n_mels,
        n_fft=n_fft,
        hop_length=hop_length,
        augment=augment,
        mix_background=True,
        mix_ratio_range=(0.1, 0.5),                     # Proportion mix of background and doorbell
        max_background_samples=n_max_background_samples, # Limit of negative examples
        max_mixed_samples=n_max_background_samples*n_max_mixed_samples_coef # Mixed examples to generate
)
else:
    print("ERROR: doorbell.mp3 or doorbell.wav not found!")
    print("Please add your doorbell recording as 'doorbell.mp3' or 'doorbell.wav' in the audio_samples folder")
    print("The model training will not be effective without a doorbell sample.")

## 4. Explore Processed Data

Let's examine the extracted features and check the balance of our dataset.

In [None]:
# Display information about the dataset
print(f"Total samples: {len(features)}")
print(f"Number of features per sample: {features.shape[1]}")
print(f"Doorbell samples: {np.sum(labels)}")
print(f"Background noise samples: {len(labels) - np.sum(labels)}")
print(f"Class balance: {np.sum(labels) / len(labels):.2%} doorbell vs {1 - np.sum(labels) / len(labels):.2%} background")

## 5. Split Data into Training and Validation Sets

We'll use stratified sampling to maintain the class distribution.

In [None]:
# Split the dataset
X_train, X_val, y_train, y_val = train_test_split(
    features, labels, test_size=0.2, random_state=42, stratify=labels
)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Validation set: {X_val.shape[0]} samples")
print(f"Training set distribution: {np.sum(y_train)} doorbell, {len(y_train) - np.sum(y_train)} background sounds")
print(f"Validation set distribution: {np.sum(y_val)} doorbell, {len(y_val) - np.sum(y_val)} background souds")


## 6. Define the Model Architecture

We'll create a simple neural network with dropout for regularization.

In [None]:
# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

# Print model summary
model.summary()

## 7. Train the Model

We'll use early stopping to prevent overfitting and learning rate reduction to improve convergence.

In [None]:
# Define callbacks
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=0.0001,
        verbose=1
    )
]

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=callbacks,
    verbose=1
)

## 8. Evaluate the Model

Let's check how well our model performs by analyzing metrics and visualizing results.

In [None]:
# Make predictions on validation set
y_pred_prob = model.predict(X_val)
y_pred = (y_pred_prob > 0.5).astype("int32")

# Calculate evaluation metrics
conf_matrix = confusion_matrix(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

# Print metrics
print("\nModel Evaluation:")
print("Confusion Matrix:")
print(conf_matrix)
print(f"True Positives: {conf_matrix[1][1]}")
print(f"False Positives: {conf_matrix[0][1]}")
print(f"True Negatives: {conf_matrix[0][0]}")
print(f"False Negatives: {conf_matrix[1][0]}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

# Plot training history
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='lower right')

plt.subplot(1, 3, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')

plt.subplot(1, 3, 3)
plt.plot(history.history['precision'])
plt.plot(history.history['val_precision'])
plt.plot(history.history['recall'])
plt.plot(history.history['val_recall'])
plt.title('Precision & Recall')
plt.xlabel('Epoch')
plt.legend(['Precision', 'Val Precision', 'Recall', 'Val Recall'], loc='lower right')

plt.tight_layout()
plt.show()

# Visualize predictions
plt.figure(figsize=(10, 6))
plt.scatter(range(len(y_val)), y_pred_prob, c=y_val, cmap='coolwarm', alpha=0.6)
plt.axhline(y=0.5, color='r', linestyle='-')
plt.title('Model Predictions')
plt.xlabel('Sample Index')
plt.ylabel('Prediction Probability')
plt.colorbar(label='Actual Class')
plt.show()

## 9. Save the Model

Let's save our trained model for deployment.

In [None]:
# Create directory for models if it not exists
models_dir = '../models'
os.makedirs(models_dir, exist_ok=True)

# Save in H5 format (TensorFlow/Keras)
model_path = os.path.join(models_dir, 'doorbell_detector.h5')
model.save(model_path)
print(f"Model saved to {model_path}")

# Convert to TFLite format (embedded devices)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

tflite_model_path = os.path.join(models_dir, 'doorbell_detector.tflite')
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)
print(f"TFLite model saved to {tflite_model_path}")