# Audio I/O Module Implementation

This notebook implements the Audio I/O Module for PolyLingua, including the `AudioFormat` type and `AudioProcessor` class with all required methods.

In [1]:
# Install required packages
%pip install pydub soundfile librosa numpy scipy

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Import necessary libraries
from dataclasses import dataclass
from typing import Optional
import io
import numpy as np
from pydub import AudioSegment
import soundfile as sf



In [5]:
# Define AudioFormat type
@dataclass
class AudioFormat:
    sample_rate: int
    channels: int
    encoding: str  # e.g., 'wav', 'mp3', etc.

In [6]:
# Define AudioProcessor class
class AudioProcessor:
    def __init__(self):
        self.audio: Optional[AudioSegment] = None
        self.format: Optional[AudioFormat] = None
    
    def load_audio_from_bytes(self, audio_bytes: bytes, input_format: AudioFormat) -> None:
        """Load audio from bytes using pydub."""
        self.audio = AudioSegment.from_file(io.BytesIO(audio_bytes), format=input_format.encoding)
        self.format = input_format
    
    def resample(self, target_sample_rate: int) -> None:
        """Resample audio to target sample rate."""
        if self.audio:
            self.audio = self.audio.set_frame_rate(target_sample_rate)
            self.format.sample_rate = target_sample_rate
    
    def convert_to_mono(self) -> None:
        """Convert audio to mono."""
        if self.audio and self.audio.channels > 1:
            self.audio = self.audio.set_channels(1)
            self.format.channels = 1
    
    def normalize_volume(self, target_dbfs: float = -20.0) -> None:
        """Normalize audio volume to target dBFS."""
        if self.audio:
            difference = target_dbfs - self.audio.dBFS
            self.audio = self.audio.apply_gain(difference)
    
    def export_to_wav_bytes(self) -> bytes:
        """Export audio to WAV bytes."""
        if self.audio:
            buffer = io.BytesIO()
            self.audio.export(buffer, format="wav")
            return buffer.getvalue()
        return b""
    
    def validate_audio_duration(self, max_seconds: float = 60.0) -> bool:
        """Validate audio duration."""
        if self.audio:
            duration = len(self.audio) / 1000.0  # pydub duration in ms
            return duration <= max_seconds
        return False

In [7]:
# Test the AudioProcessor class
processor = AudioProcessor()

# Example: Load a sample audio (assuming you have audio_bytes)
# For testing, you can use a dummy or load from file
# audio_bytes = b'...your audio data...'
# input_format = AudioFormat(sample_rate=44100, channels=2, encoding='wav')
# processor.load_audio_from_bytes(audio_bytes, input_format)

print("AudioProcessor class defined successfully.")

AudioProcessor class defined successfully.


In [None]:
# Test resample method
# processor.resample(16000)
# print(f"Resampled to {processor.format.sample_rate} Hz")

In [None]:
# Test convert_to_mono
# processor.convert_to_mono()
# print(f"Converted to {processor.format.channels} channel(s)")

In [None]:
# Test normalize_volume
# processor.normalize_volume()
# print(f"Normalized to {processor.audio.dBFS} dBFS")

In [None]:
# Test export_to_wav_bytes
# wav_bytes = processor.export_to_wav_bytes()
# print(f"Exported {len(wav_bytes)} bytes of WAV data")

In [None]:
# Test validate_audio_duration
# is_valid = processor.validate_audio_duration()
# print(f"Audio duration valid: {is_valid}")

In [1]:
# Import necessary libraries
from dataclasses import dataclass
from typing import Optional
import io
import numpy as np
from pydub import AudioSegment
import soundfile as sf

ModuleNotFoundError: No module named 'pydub'

In [2]:
# Install required packages
%pip install pydub soundfile librosa numpy scipy

Collecting soundfile
  Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata (16 kB)
Collecting librosa
  Downloading librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.1.0-py3-none-any.whl.metadata (9.0 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-1.0.0-cp311-cp311-win_amd64.whl.metadata (5.6 kB)
Collecting lazy_loader>=0.1 (from librosa)
  Downloading lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)
Collecting msgpack>=1.0 (from librosa)
  Downloading msgpack-1.1.2-cp311-cp311-win_amd64.whl.metadata (8.4 kB)
Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl (1.0 MB)
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ---------------------------------------- 1.0/1.0 MB 6.9 MB/s  0:00:00
Downloading librosa-0.11.0-py3-none-any.whl (260 kB)
Downloading audioread-3.1.0-py

In [None]:
# Define AudioFormat type
@dataclass
class AudioFormat:
    sample_rate: int
    channels: int
    encoding: str  # e.g., 'wav', 'mp3', etc.

In [None]:
# Define AudioProcessor class
class AudioProcessor:
    def __init__(self):
        self.audio: Optional[AudioSegment] = None
        self.format: Optional[AudioFormat] = None
    
    def load_audio_from_bytes(self, audio_bytes: bytes, input_format: AudioFormat) -> None:
        """Load audio from bytes using pydub."""
        self.audio = AudioSegment.from_file(io.BytesIO(audio_bytes), format=input_format.encoding)
        self.format = input_format
    
    def resample(self, target_sample_rate: int) -> None:
        """Resample audio to target sample rate."""
        if self.audio:
            self.audio = self.audio.set_frame_rate(target_sample_rate)
            self.format.sample_rate = target_sample_rate
    
    def convert_to_mono(self) -> None:
        """Convert audio to mono."""
        if self.audio and self.audio.channels > 1:
            self.audio = self.audio.set_channels(1)
            self.format.channels = 1
    
    def normalize_volume(self, target_dbfs: float = -20.0) -> None:
        """Normalize audio volume to target dBFS."""
        if self.audio:
            difference = target_dbfs - self.audio.dBFS
            self.audio = self.audio.apply_gain(difference)
    
    def export_to_wav_bytes(self) -> bytes:
        """Export audio to WAV bytes."""
        if self.audio:
            buffer = io.BytesIO()
            self.audio.export(buffer, format="wav")
            return buffer.getvalue()
        return b""
    
    def validate_audio_duration(self, max_seconds: float = 60.0) -> bool:
        """Validate audio duration."""
        if self.audio:
            duration = len(self.audio) / 1000.0  # pydub duration in ms
            return duration <= max_seconds
        return False

In [None]:
# Test the AudioProcessor class
processor = AudioProcessor()

# Example: Load a sample audio (assuming you have audio_bytes)
# For testing, you can use a dummy or load from file
# audio_bytes = b'...your audio data...'
# input_format = AudioFormat(sample_rate=44100, channels=2, encoding='wav')
# processor.load_audio_from_bytes(audio_bytes, input_format)

print("AudioProcessor class defined successfully.")

In [None]:
# Test resample method
# processor.resample(16000)
# print(f"Resampled to {processor.format.sample_rate} Hz")

In [None]:
# Test convert_to_mono
# processor.convert_to_mono()
# print(f"Converted to {processor.format.channels} channel(s)")

In [None]:
# Test normalize_volume
# processor.normalize_volume()
# print(f"Normalized to {processor.audio.dBFS} dBFS")

In [None]:
# Test export_to_wav_bytes
# wav_bytes = processor.export_to_wav_bytes()
# print(f"Exported {len(wav_bytes)} bytes of WAV data")

In [None]:
# Test validate_audio_duration
# is_valid = processor.validate_audio_duration()
# print(f"Audio duration valid: {is_valid}")