In [1]:
import pyaudio
import numpy as np
import tensorflow as tf
import soundfile as sf
import numpy as np

# -----------------------
# Constants (match training)
# -----------------------
SAMPLE_RATE = 16000
NUM_SAMPLES = 20000
NUM_MELS = 40
FRAME_LENGTH = 512
FRAME_STEP = 160
FFT_LENGTH = 512
FMIN = 80.0
FMAX = 7600.0

# -----------------------
# Preprocessing: waveform → log-mel
# -----------------------
def waveform_to_log_mel(waveform):
    #Convert wav file data into tensors as a spectrogram
    x = tf.convert_to_tensor(waveform, dtype=tf.float32)
    x = tf.concat([x[:1], x[1:] - 0.97 * x[:-1]], 0)

    stft = tf.signal.stft(
        x,
        frame_length=FRAME_LENGTH,
        frame_step=FRAME_STEP,
        fft_length=FFT_LENGTH,
        window_fn=tf.signal.hann_window
    )
    mag = tf.abs(stft)

    mel_weight = tf.signal.linear_to_mel_weight_matrix(
        NUM_MELS, mag.shape[-1], SAMPLE_RATE, FMIN, FMAX)
    mel = tf.matmul(mag, mel_weight)

    log_mel = tf.math.log(mel + 1e-6)
    mean = tf.math.reduce_mean(log_mel)
    std = tf.math.reduce_std(log_mel) + 1e-6
    log_mel = (log_mel - mean) / std
     #add batch size dimension as model expects this when given data
    return tf.expand_dims(log_mel, -1)  # (time, mels, 1)






In [2]:
# check hardware for mic
p = pyaudio.PyAudio()
for i in range(p.get_device_count()):
    info = p.get_device_info_by_index(i)
    if info["maxInputChannels"] > 0:
        print(f"[{i}] {info['name']}")
p.terminate()


[0] MacBook Pro Microphone


In [19]:
# -----------------------
# Load trained Keras model
# -----------------------
MODEL_PATH="/Users/sethwright/Documents/audio-model/output/saved_model.keras"
model = tf.keras.models.load_model(MODEL_PATH, compile =False, safe_mode=False)
print("✅ Model loaded:", MODEL_PATH)


✅ Model loaded: /Users/sethwright/Documents/audio-model/output/saved_model.keras


In [None]:
#load wav files 16000 sample rate
def load_test_clip(filepath):
    waveform, sr = sf.read(filepath, dtype="float32")

    # Resample if needed
    if sr != SAMPLE_RATE:
        print(f"Resampling from {sr} → {SAMPLE_RATE}")
        waveform = tf.signal.resample(waveform, int(len(waveform) * SAMPLE_RATE / sr)).numpy()

    # Mono
    if len(waveform.shape) > 1:
        waveform = np.mean(waveform, axis=1)

    # Pad or trim to expected length
    waveform = waveform[:NUM_SAMPLES]
    if len(waveform) < NUM_SAMPLES:
        waveform = np.pad(waveform, (0, NUM_SAMPLES - len(waveform)))

    return waveform

In [2]:
#number is just to help track files start at 0
y = 500

In [196]:


SAMPLE_RATE = 16000
DURATION_SECONDS = 1.25
NUM_SAMPLES = int(SAMPLE_RATE * DURATION_SECONDS)
DEVICE_INDEX = 0  # change to your mic index

def record_clip(x):
    save_path="test_recording"+str(x)+"_positive.wav"
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=SAMPLE_RATE,
                    input=True,
                    input_device_index=DEVICE_INDEX,
                    frames_per_buffer=1024)  # smaller buffer = more reliable

    print("🎙️  Recording 1.25 s sample...")

    frames = []
    for _ in range(int(SAMPLE_RATE / 1024 * DURATION_SECONDS)):
        data = stream.read(1024, exception_on_overflow=False)
        frames.append(data)

    stream.stop_stream()
    stream.close()
    p.terminate()

    # Join frames & convert to float32 waveform
    audio_bytes = b"".join(frames)
    waveform = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0

    # Fix length to NUM_SAMPLES
    waveform = waveform[:NUM_SAMPLES]
    if len(waveform) < NUM_SAMPLES:
        waveform = np.pad(waveform, (0, NUM_SAMPLES - len(waveform)))

    # Save to WAV file
    sf.write(save_path, waveform, SAMPLE_RATE)
    print(f"💾 Saved recording to: {save_path}")
   

    return waveform
#us while loop to get background noise and put a break point when y gets to desried number
# Run it
#while True:
   # y+= 1 
   # record_clip(y)
    
    #break
    #record one clip at time and save it
record_clip(y) 
y +=1


🎙️  Recording 1.25 s sample...
💾 Saved recording to: test_recording693_positive.wav


In [33]:
# -----------------------
# Run inference
# -----------------------

waveform = load_test_clip("/Users/sethwright/Documents/audio-model/test_recording.wav")
spec = waveform_to_log_mel(waveform)
spec = tf.expand_dims(spec, 0)  # add batch dim

pred = model.predict(spec)[0][0]
print(f"\nModel raw output: {pred:.4f}")
print("Wake word detected!" if pred > 0.5 else "No wake word.")



Model raw output: 0.0016
No wake word.
