🔹 Step 1: Recording dataset

In [2]:
import sounddevice as sd
from scipy.io.wavfile import write
import os
import glob

def get_next_index(label, folder="dataset"):
    """Return the next index number for the given label."""
    existing_files = glob.glob(f"{folder}/{label}_*.wav")
    if not existing_files:
        return 0
    indices = [int(f.split("_")[-1].split(".")[0]) for f in existing_files]
    return max(indices) + 1

def record_samples(label, count=5, duration=2, fs=16000, folder="dataset"):
    os.makedirs(folder, exist_ok=True)
    start_index = get_next_index(label, folder)
    for i in range(start_index, start_index + count):
        print(f"Recording {label} sample {i}...")
        audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
        sd.wait()
        filename = f"{folder}/{label}_{i}.wav"
        write(filename, fs, audio)
        print(f"Saved {filename}")

# Example: record 5 new "on" and 5 new "off"
record_samples("on", count=5)
record_samples("off", count=5)


Recording on sample 15...
Saved dataset/on_15.wav
Recording on sample 16...
Saved dataset/on_16.wav
Recording on sample 17...
Saved dataset/on_17.wav
Recording on sample 18...
Saved dataset/on_18.wav
Recording on sample 19...
Saved dataset/on_19.wav
Recording off sample 15...
Saved dataset/off_15.wav
Recording off sample 16...
Saved dataset/off_16.wav
Recording off sample 17...
Saved dataset/off_17.wav
Recording off sample 18...
Saved dataset/off_18.wav
Recording off sample 19...
Saved dataset/off_19.wav


🔹 Step 2: Feature Extraction

In [3]:
import glob
import numpy as np
from scipy.io import wavfile
from python_speech_features import mfcc

X, y = [], []

for file in glob.glob("dataset/*.wav"):
    label = 1 if "on" in file else 0
    sr, audio = wavfile.read(file)
    features = mfcc(audio, sr, numcep=13)
    mfcc_mean = np.mean(features, axis=0)
    X.append(mfcc_mean)
    y.append(label)

X = np.array(X)
y = np.array(y)

print("Features shape:", X.shape)


Features shape: (40, 13)


🔹 Step 3: Train a Simple Model

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

import joblib
joblib.dump(model, "voice_model.pkl")
print("Model saved as voice_model.pkl")


Accuracy: 1.0
Model saved as voice_model.pkl


🔹 Step 4: Real-time Voice Command Detection

In [5]:
import joblib
import sounddevice as sd
import numpy as np
from python_speech_features import mfcc
import serial

model = joblib.load("voice_model.pkl")
def listen_and_predict(duration=2, fs=16000):
    print("Speak now...")
    audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
    sd.wait()
    audio = audio.flatten()

    # Extract MFCC features
    features = mfcc(audio, fs, numcep=13)
    mfcc_mean = np.mean(features, axis=0).reshape(1, -1)

    # Predict using trained model
    pred = model.predict(mfcc_mean)[0]
    if pred == 1:
        print("🟢 Command: ON")
        ## ser.write(b"ON\n")
    else:
        print("🔴 Command: OFF")
        ## ser.write(b"OFF\n")
    return pred  
# Connect to STM32
## ser = serial.Serial("COM3", 9600, timeout=1)

while True:
    result = listen_and_predict()
    if result == 0:  
        print("Stopping listener... (OFF command detected)")
        break


Speak now...
🟢 Command: ON
Speak now...
🟢 Command: ON
Speak now...
🟢 Command: ON
Speak now...
🟢 Command: ON
Speak now...
🟢 Command: ON
Speak now...
🟢 Command: ON
Speak now...
🟢 Command: ON
Speak now...
🔴 Command: OFF
Stopping listener... (OFF command detected)


In [7]:
import tkinter as tk
import joblib
import sounddevice as sd
import numpy as np
from python_speech_features import mfcc
## import serial

# Load trained model
model = joblib.load("voice_model.pkl")

# Serial to STM32
## ser = serial.Serial("COM3", 9600, timeout=1)

def listen_and_predict(duration=2, fs=16000):
    audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
    sd.wait()
    audio = audio.flatten()
    features = mfcc(audio, fs, numcep=13)
    mfcc_mean = np.mean(features, axis=0).reshape(1, -1)
    return model.predict(mfcc_mean)[0]

def voice_control():
    pred = listen_and_predict()
    if pred == 1:
        ## ser.write(b"ON\n")
        status_var.set("🟢 System turned ON (voice)")
    else:
        ## ser.write(b"OFF\n")
        status_var.set("🔴 System turned OFF (voice)")

# Tkinter GUI
root = tk.Tk()
root.title("Ground Station")
status_var = tk.StringVar(value="System Idle")

tk.Label(root, textvariable=status_var, font=("Arial", 16)).pack(pady=20)
tk.Button(root, text="Voice Control", command=voice_control).pack(pady=20)

root.mainloop()
