In [14]:
import tensorflow as tf
import numpy as np

# Load the model
model_path = 'my_model.keras'  # Use the extension you saved the model with
loaded_model = tf.keras.models.load_model(model_path)

label_names = np.array(['down', 'go', 'left', 'no', 'right', 'stop', 'up', 'yes'])


In [21]:

# Preprocess audio
def load_audio_file(file_path):
    audio_binary = tf.io.read_file(file_path)
    audio, _ = tf.audio.decode_wav(audio_binary)
    return tf.squeeze(audio, axis=-1)

def get_spectrogram(waveform):
    spectrogram = tf.signal.stft(
        waveform, frame_length=255, frame_step=128)
    spectrogram = tf.abs(spectrogram)
    spectrogram = spectrogram[..., tf.newaxis]
    return spectrogram

def preprocess_audio(audio):
    spectrogram = get_spectrogram(audio)
    return spectrogram


In [22]:
# Functions to make and interpret predictions
def predict(model, audio):
    spectrogram = preprocess_audio(audio)
    spectrogram = tf.expand_dims(spectrogram, 0)  # Add batch dimension
    prediction = model(spectrogram)
    predicted_label = tf.argmax(prediction, axis=-1)
    return predicted_label.numpy()[0]

def decode_prediction(predicted_label, label_names):
    return label_names[predicted_label]


In [48]:
# Record audio and make predictions
import sounddevice as sd
import scipy.io.wavfile as wav

def record_audio(duration=1, fs=16000):
    print("Recording...")
    audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='float32')
    sd.wait()  # Wait until the recording is finished
    print("Recording finished.")
    audio = np.squeeze(audio, axis=-1)
    return audio

audio = record_audio(duration=1, fs=16000)  # Record 1 second of audio
predicted_label = predict(loaded_model, audio)
predicted_command = decode_prediction(predicted_label, label_names)
print(f"Predicted command: {predicted_command}")

Recording...
Recording finished.
Predicted command: right
