In [None]:
import pyaudio
import wave
import numpy as np
from scipy.signal import butter, lfilter, stft, istft
from scipy.ndimage import median_filter

# Function for high-pass filter
def butter_highpass(cutoff, fs, order=5):
    nyquist = 0.5 * fs
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    return b, a

def highpass_filter(data, cutoff, fs, order=5):
    b, a = butter_highpass(cutoff, fs, order=order)
    return lfilter(b, a, data)

# Function for spectral subtraction with Wiener filter
def noise_reduction_spectral(audio_chunk, noise_estimation, alpha=1.5):
    f, t, Zxx = stft(audio_chunk, fs=44100, nperseg=512)
    noise_power = np.abs(noise_estimation)**2
    magnitude = np.abs(Zxx)
    phase = np.angle(Zxx)

    # Apply spectral subtraction with a smoothing factor
    reduced_magnitude = np.maximum(magnitude - alpha * noise_power, 0)

    # Wiener filtering (in the frequency domain)
    noise_magnitude = np.sqrt(noise_power)
    signal_magnitude = np.sqrt(magnitude**2 - noise_power)
    wiener_filter = np.clip(signal_magnitude / (signal_magnitude + noise_magnitude), 0, 1)

    # Apply Wiener filter to the magnitude
    enhanced_magnitude = reduced_magnitude * wiener_filter

    # Reconstruct the signal using enhanced magnitude and the original phase
    Zxx_reduced = enhanced_magnitude * np.exp(1j * phase)
    _, audio_reconstructed = istft(Zxx_reduced, fs=44100)

    # Normalize the output to avoid clipping
    audio_reconstructed = np.int16(np.clip(audio_reconstructed / np.max(np.abs(audio_reconstructed)), -1, 1) * 32767)

    # Apply median filter to smooth out high-frequency noise
    audio_reconstructed = median_filter(audio_reconstructed, size=3)

    return audio_reconstructed

# Parameters
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 8820  # ~200 ms chunks
RECORD_SECONDS = 10
OUTPUT_FILE_SINGLE = "single_speaker_output.wav"
OUTPUT_FILE_MULTI = "multi_speaker_output.wav"

# Initialize PyAudio
p = pyaudio.PyAudio()

# Open input stream
stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

print("Recording and processing in real-time...")

# Noise estimation (collecting a few chunks for baseline noise profile)
print("Collecting noise profile...")
noise_profile_chunks = []
for _ in range(5):
    noise_data = stream.read(CHUNK)
    noise_audio = np.frombuffer(noise_data, dtype=np.int16)
    noise_profile_chunks.append(noise_audio)
noise_profile = np.mean(noise_profile_chunks, axis=0)

frames_single = []
frames_multi = []

# Recording and processing audio
for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    audio_data = np.frombuffer(data, dtype=np.int16)

    # Single speaker scenario
    filtered_audio = highpass_filter(audio_data, cutoff=150, fs=RATE)  # Lowered cutoff for voice preservation
    single_speaker_output = noise_reduction_spectral(filtered_audio, noise_profile)
    frames_single.append(single_speaker_output.astype(np.int16).tobytes())

    # Multi-speaker scenario
    multi_speaker_output = noise_reduction_spectral(audio_data, noise_profile)
    frames_multi.append(multi_speaker_output.astype(np.int16).tobytes())

print("Recording completed.")

# Stop and close stream
stream.stop_stream()
stream.close()
p.terminate()

# Save single speaker output
wf_single = wave.open(OUTPUT_FILE_SINGLE, 'wb')
wf_single.setnchannels(CHANNELS)
wf_single.setsampwidth(p.get_sample_size(FORMAT))
wf_single.setframerate(RATE)
wf_single.writeframes(b''.join(frames_single))
wf_single.close()

# Save multi-speaker output
wf_multi = wave.open(OUTPUT_FILE_MULTI, 'wb')
wf_multi.setnchannels(CHANNELS)
wf_multi.setsampwidth(p.get_sample_size(FORMAT))
wf_multi.setframerate(RATE)
wf_multi.writeframes(b''.join(frames_multi))
wf_multi.close()

print(f"Processed audio saved as {OUTPUT_FILE_SINGLE} and {OUTPUT_FILE_MULTI}.")