# Preprocess Audio Files

### Libraries:
- Audio Segment
- os

In [1]:
from pydub import AudioSegment
import os

### Preprocessing Function:

In [2]:
def preprocess_audio(file_path, output_path, sr=16000, length=30*1000):
    # Load audio file
    audio = AudioSegment.from_file(file_path)
    
    # Resample to desired sampling rate
    audio = audio.set_frame_rate(sr)
    
    # Normalize audio
    audio = audio.apply_gain(-audio.max_dBFS)
    
    # Trim or pad to the desired length
    if len(audio) > length:
        audio = audio[:length]
    else:
        padding = AudioSegment.silent(duration=length - len(audio))
        audio = audio + padding
    
    # Export the preprocessed audio
    audio.export(output_path, format="wav")

### Retriving and Saving Paths

In [3]:
# Directory containing downloaded audio segments
input_dir = 'audio_segments'

# Directory to save preprocessed audio segments
output_dir = 'preprocessed_audio_segments'
os.makedirs(output_dir, exist_ok=True)

### Preprocessing each audio file

In [4]:
for file_name in os.listdir(input_dir):
    if file_name.endswith('.wav'):
        file_path = os.path.join(input_dir, file_name)
        output_path = os.path.join(output_dir, file_name)
        preprocess_audio(file_path, output_path)