In [None]:
!pip install librosa soundfile pydub numpy tqdm


In [None]:
import os
import librosa
import soundfile as sf
from tqdm import tqdm

def standardize_audio(input_folder, output_folder, target_sr=16000):
    os.makedirs(output_folder, exist_ok=True)

    for file in tqdm(os.listdir(input_folder)):
        if not file.lower().endswith((".wav", ".mp3")):
            continue

        path = os.path.join(input_folder, file)
        y, sr = librosa.load(path, sr=target_sr, mono=True)
        out_path = os.path.join(output_folder, file.replace(".mp3",".wav"))

        sf.write(out_path, y, target_sr)


In [None]:
standardize_audio("raw_audio", "clean_step1_sr")


In [None]:
import numpy as np

def trim_silence(input_folder, output_folder, top_db=25):
    os.makedirs(output_folder, exist_ok=True)

    for file in tqdm(os.listdir(input_folder)):
        if not file.endswith(".wav"):
            continue

        audio_path = os.path.join(input_folder, file)
        y, sr = librosa.load(audio_path, sr=None)

        # Trim silence
        trimmed, _ = librosa.effects.trim(y, top_db=top_db)

        sf.write(os.path.join(output_folder, file), trimmed, sr)


In [None]:
trim_silence("clean_step1_sr", "clean_step2_trimmed")


In [None]:
def normalize_audio(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    for file in tqdm(os.listdir(input_folder)):
        if not file.endswith(".wav"):
            continue

        path = os.path.join(input_folder, file)
        y, sr = librosa.load(path, sr=None)

        y = y / np.max(np.abs(y))  # normalize to -1..1

        sf.write(os.path.join(output_folder, file), y, sr)


In [None]:
normalize_audio("clean_step2_trimmed", "clean_step3_normalized")


In [None]:
def filter_duration(input_folder, output_folder, min_sec=3, max_sec=10):
    os.makedirs(output_folder, exist_ok=True)

    for file in tqdm(os.listdir(input_folder)):
        if not file.endswith(".wav"):
            continue

        path = os.path.join(input_folder, file)
        y, sr = librosa.load(path, sr=None)

        dur = len(y) / sr

        if min_sec <= dur <= max_sec:
            sf.write(os.path.join(output_folder, file), y, sr)


In [None]:
filter_duration("clean_step3_normalized", "clean_final_audio")
