# Importing the necessary libraries


In [1]:
import os
import numpy as np
import librosa
import soundfile as sf
import pyaudio
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout
from scipy.signal import butter, lfilter
import wave
import tensorflow as tf


# Record 30 seconds of audio

In [3]:
# Directory for storing recorded audio
os.makedirs("recordings", exist_ok=True)

def record_audio(filename, duration=30, rate=16000, channels=1):
    """Records audio for the specified duration and saves it to a .wav file."""
    chunk = 1024
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16,
                    channels=channels,
                    rate=rate,
                    input=True,
                    frames_per_buffer=chunk)
    print(f"Recording {duration} seconds of audio...")
    frames = []
    for _ in range(0, int(rate / chunk * duration)):
        data = stream.read(chunk)
        frames.append(data)
    print("Recording complete.")
    stream.stop_stream()
    stream.close()
    p.terminate()

    # Save audio as a .wav file using wave module
    with wave.open(filename, 'wb') as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
        wf.setframerate(rate)
        wf.writeframes(b''.join(frames))
    print(f"Audio saved to {filename}")

record_audio("recordings/user_voice.wav", duration=30)

Recording 30 seconds of audio...
Recording complete.
Audio saved to recordings/user_voice.wav


# Preprocess Audio: Feature Extraction

In [2]:
def extract_features(audio_path):
    y, sr = librosa.load(audio_path, sr=16000)  # Load audio file
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)  # Extract MFCC features
    mfccs = np.mean(mfccs.T, axis=0)  # Average over time to get a fixed-size feature vector
    return mfccs

user_features = extract_features("recordings/user_voice.wav")

# Train a Simple Neural Network for Voice Recognition

In [None]:
def build_model(input_shape):
    model = Sequential()

    # CNN layers for feature extraction
    model.add(Conv1D(64, 3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(128, 3, activation='relu'))
    model.add(MaxPooling1D(2))

    # LSTM layers for sequential data
    model.add(LSTM(128, return_sequences=True))
    model.add(LSTM(64))

    # Fully connected layer for classification
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))  # Dropout to prevent overfitting
    model.add(Dense(1, activation='sigmoid'))  # Binary classification: Voice vs. Background noise
   
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Prepare training data (simplified example, replace with a proper dataset)
X_train = np.array([user_features, np.random.rand(13)])  # User voice + random noise
y_train = np.array([0, 1])  # Labels: user (1), not user (0)

model = build_model(input_shape=(13, 1))
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)

# Save the model
model.save("voice_recognition_model.h5")

In [3]:
# Load the model
model = tf.keras.models.load_model("voice_recognition_model.h5")



# Noise Suppression: Simple Spectral Gating

In [12]:
def butter_bandpass(lowcut, highcut, fs, order=5):
    """Creates a bandpass filter."""
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return b, a

def bandpass_filter(data, lowcut=300, highcut=3400, fs=16000, order=5):
    """Applies a bandpass filter to audio data."""
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    return lfilter(b, a, data)

def noise_suppression(audio_path):
    """Performs basic noise suppression on an audio file."""
    audio, sr = librosa.load(audio_path, sr=None)
    filtered_audio = bandpass_filter(audio)
    sf.write("recordings/filtered_audio.wav", filtered_audio, sr)
    print("Noise suppression complete.")



noise_suppression("recordings/user_voice.wav")


Noise suppression complete.


# Real-Time Noise Suppression and Voice Recognition

In [None]:

def real_time_processing():
    """Processes real-time audio for voice recognition and noise suppression."""
    chunk = 1024
    rate = 16000
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=rate,
                    input=True,
                    frames_per_buffer=chunk)
    print("Listening for user's voice...")
    try:
        while True:
            data = stream.read(chunk, exception_on_overflow=False)  # Avoid overflow errors
            audio = np.frombuffer(data, dtype=np.int16).astype(np.float32) / 32768.0
            filtered_audio = bandpass_filter(audio, fs=rate)
            
            # Use trained model to detect the user's voice
            mfccs = librosa.feature.mfcc(y=filtered_audio, sr=rate, n_mfcc=13)
            mfccs_mean = np.mean(mfccs.T, axis=0)
            prediction = model.predict(np.expand_dims(mfccs_mean, axis=0))
            
            if prediction > 0.5:
                print("\n User's voice detected.")
            # elif prediction < 0.5:
                # print("\n User's voice not detected.")
    except KeyboardInterrupt:
        print("Stopping...")
    finally:
        stream.stop_stream()
        stream.close()
        p.terminate()

real_time_processing()