Recording and playing my audio

In [1]:
import pyaudio
import numpy as np
from IPython.display import Audio, display  

In [2]:
# Audio configuration
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 4000
frames = []

In [3]:
# Initialize PyAudio
p = pyaudio.PyAudio()

In [4]:
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
print("🎤 Recording audio... Press Ctrl+C to stop.")

try:
    while True:
        data = stream.read(CHUNK)
        frames.append(data)
except KeyboardInterrupt:
    print("⏹ Recording stopped.")
finally:
    stream.stop_stream()
    stream.close()
    print("✅ Recording finished.")

🎤 Recording audio... Press Ctrl+C to stop.
⏹ Recording stopped.
✅ Recording finished.


In [5]:
if len(frames) > 0:
    print("🔊 Playing back...")
    stream_out = p.open(
        format=FORMAT,
        channels=CHANNELS,
        rate=RATE,
        output=True
    )
    
    # Concatenate all chunks and play
    audio_data = b''.join(frames)
    stream_out.write(audio_data)
    
    stream_out.stop_stream()
    stream_out.close()
    print("✅ Playback finished.")

🔊 Playing back...
✅ Playback finished.


In [6]:
# Cleanup
p.terminate()
print("✅ PyAudio terminated.")
print("✅ All done!")

✅ PyAudio terminated.
✅ All done!


In [7]:
audio_np = np.frombuffer(b''.join(frames), dtype=np.int16)
sr=RATE

Converting the Recorded Audio to tensor

In [8]:
import torch

In [9]:
sr=RATE

In [10]:
# Convert raw audio chunks to a NumPy array
audio_np = np.frombuffer(b''.join(frames), dtype=np.int16)  # Shape: (N_samples,)
# Normalize to [-1, 1] (common for audio models)
audio_np = audio_np.astype(np.float32) / 32768.0  # 32768 = max(int16)
# Convert to PyTorch tensor and add batch dimension (B, T)
audio_tensor = torch.from_numpy(audio_np).unsqueeze(0)  # Shape: (1, N_samples)
print("✅ Audio tensor shape:", audio_tensor.shape)

✅ Audio tensor shape: torch.Size([1, 68000])


In [11]:
print(f"Input tensor shape: {audio_tensor.shape}, type: {audio_tensor.dtype}")
print(f"Sample rate: {sr}, type: {type(sr)}")

Input tensor shape: torch.Size([1, 68000]), type: torch.float32
Sample rate: 16000, type: <class 'int'>


In [12]:
# Ensure tensor is float32 and has right shape
if audio_tensor.dtype != torch.float32:
    audio_tensor = audio_tensor.float()

In [13]:
# Ensure sample rate is valid
if sr is None:
    sr = 16000  # default to your recording rate

Performing Audio Preprocessing

In [14]:
import sys
import os
script_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'scripts'))
sys.path.append(script_path)

In [15]:
try:
    import audio_pipeline
except ImportError as e:
    print("Error importing audio_pipeline:", e)

  from .autonotebook import tqdm as notebook_tqdm


In [16]:
import importlib
importlib.reload(audio_pipeline)

<module 'audio_pipeline' from 'd:\\PERSONAL\\GitHub\\College\\MOTIVE\\backend\\scripts\\audio_pipeline.py'>

In [17]:
enhanced_audio,sr=audio_pipeline.process_audio(audio_tensor,sr)

Play audio

In [23]:
audio_numpy = audio_tensor.numpy()
# Play audio
Audio(audio_numpy, rate=sr)

In [24]:
enhanced_audio_numpy = enhanced_audio.numpy()
# Play audio
Audio(enhanced_audio_numpy, rate=sr)

Saving the Audio for Comparision

In [18]:
import torchaudio
from pathlib import Path

In [19]:
data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'data'))

In [20]:
audio_folder=data_path+r"\audio"
recorded_audio_path = audio_folder +r"\recorded_audio.wav"

In [21]:
parent_dir = Path(audio_folder)
parent_dir.mkdir(parents=True, exist_ok=True)


In [22]:
recorded_audio_path=audio_folder+'/recorded_audio.wav'
enhanced_audio_path=audio_folder+'/enhanced_audio.wav'

In [23]:
torchaudio.save(recorded_audio_path, audio_tensor, sr)

In [24]:
torchaudio.save(enhanced_audio_path, enhanced_audio, sr)