In [1]:
# Step 1: Install system dependencies (ffmpeg for audio processing & MP3)
!apt-get update -qq && apt-get install -y -qq ffmpeg

# Step 2: Install latest audiomentations + extras (includes pyroomacoustics for RoomSimulator)
!pip install --upgrade audiomentations[extras] pydub librosa soundfile

# Step 3: Imports (correct paths - no .core.transforms)
import os
import shutil
import numpy as np
import librosa
import soundfile as sf
from audiomentations import (
    Compose,
    AddGaussianNoise,
    TimeStretch,
    PitchShift,
    RoomSimulator,           # Now available after upgrade + extras
    AddBackgroundNoise,      # Good proxy for wind/crowd if you have noise files
    ClippingDistortion,
    LowPassFilter,
    HighPassFilter
)
from pydub import AudioSegment

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Collecting audiomentations[extras]
  Downloading audiomentations-0.43.1-py3-none-any.whl.metadata (11 kB)
Collecting numpy-minmax<1,>=0.3.0 (from audiomentations[extras])
  Downloading numpy_minmax-0.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)
Collecting numpy-rms<1,>=0.4.2 (from audiomentations[extras])
  Downloading numpy_rms-0.6.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.5 kB)
Collecting python-stretch<1,>=0.3.1 (from audiomentations[extras])
  Downloading python_stretch-0.3.1-cp312-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Collecting soxr<1.0.0,>=0.3.2 (from audiomentations[extras])
  Downloading soxr-0.5.0.post1-cp312-abi3-ma

  m = re.match('([su]([0-9]{1,2})p?) \(([0-9]{1,2}) bit\)$', token)
  m2 = re.match('([su]([0-9]{1,2})p?)( \(default\))?$', token)
  elif re.match('(flt)p?( \(default\))?$', token):
  elif re.match('(dbl)p?( \(default\))?$', token):


In [22]:
# ────────────────────────────────────────────────
# Paths - CHANGE 'original_audio.wav' TO YOUR FILE NAME
# ────────────────────────────────────────────────
original_file = 'WhatsApp Audio 2026-02-15 at 9.10.07 PM.mpeg'          # ← your 2-second file
aug_dir = 'augmented_audios'
os.makedirs(aug_dir, exist_ok=True)

# Copy original (unchanged)
shutil.copy(original_file, os.path.join(aug_dir, 'WhatsApp Audio 2026-02-15 at 9.10.07 PM.mpeg'))
print(f"Original copied → {aug_dir}/WhatsApp Audio 2026-02-15 at 9.10.07 PM.mpeg")

# Load once
audio, sr = librosa.load(original_file, sr=None)
print(f"Audio loaded: {len(audio)/sr:.2f} seconds @ {sr} Hz")

# Helper to save
def save_augmented(y, name, subdir=None):
    if subdir:
        sub_path = os.path.join(aug_dir, subdir)
        os.makedirs(sub_path, exist_ok=True)
        path = os.path.join(sub_path, f'aug_{name}.wav')
    else:
        path = os.path.join(aug_dir, f'aug_{name}.wav')
    sf.write(path, y, sr)
    print(f"Saved: {path}")

Original copied → augmented_audios/WhatsApp Audio 2026-02-15 at 9.10.07 PM.mpeg
Audio loaded: 2.64 seconds @ 48000 Hz


In [3]:
# ────────────────────────────────────────────────
# 1. Cross-Device Simulation (downsample to telephony-like)
# ────────────────────────────────────────────────
def aug_cross_device(y, sr):
    y_low = librosa.resample(y, orig_sr=sr, target_sr=8000)
    return librosa.resample(y_low, orig_sr=8000, target_sr=sr)

save_augmented(aug_cross_device(audio, sr), 'cross_device')

Saved: augmented_audios/aug_cross_device.wav


In [4]:
# ────────────────────────────────────────────────
# 2. MP3 / Compression artifacts (using pydub - reliable)
# ────────────────────────────────────────────────
def aug_compression(y, sr):
    temp_wav = 'temp_in.wav'
    sf.write(temp_wav, y, sr)
    seg = AudioSegment.from_wav(temp_wav)
    seg.export('temp.mp3', format="mp3", bitrate="64k")
    seg_comp = AudioSegment.from_mp3('temp.mp3')
    y_comp = np.array(seg_comp.get_array_of_samples(), dtype=np.float32) / 32768.0
    if seg_comp.channels == 2:
        y_comp = y_comp.reshape(-1, 2).mean(axis=1)
    os.remove(temp_wav)
    os.remove('temp.mp3')
    return librosa.resample(y_comp, orig_sr=seg_comp.frame_rate, target_sr=sr)

save_augmented(aug_compression(audio, sr), 'compression')

Saved: augmented_audios/aug_compression.wav


In [5]:
# ────────────────────────────────────────────────
# 3. Reverberation (RoomSimulator - needs pyroomacoustics)
# ────────────────────────────────────────────────
aug_reverb = Compose([
    RoomSimulator(
        min_size_x=2.0, max_size_x=10.0,
        min_size_y=2.0, max_size_y=10.0,
        min_size_z=2.0, max_size_z=5.0,
        leave_length_unchanged=True,
        p=1.0
    )
])

save_augmented(aug_reverb(audio, sample_rate=sr), 'reverberation')

Saved: augmented_audios/aug_reverberation.wav


In [6]:
# ────────────────────────────────────────────────
# 4. Background overlap / crowd proxy (Gaussian or upload real noise files later)
# ────────────────────────────────────────────────
aug_bg = Compose([AddGaussianNoise(min_amplitude=0.015, max_amplitude=0.04, p=1.0)])

save_augmented(aug_bg(audio, sample_rate=sr), 'bg_overlap_proxy')

Saved: augmented_audios/aug_bg_overlap_proxy.wav


In [7]:
# ────────────────────────────────────────────────
# 5. Wind / environmental (low-freq emphasis + noise)
# ────────────────────────────────────────────────
aug_wind = Compose([
    LowPassFilter(min_cutoff_freq=80, max_cutoff_freq=400, p=0.8),
    AddGaussianNoise(min_amplitude=0.008, max_amplitude=0.025, p=1.0)
])

save_augmented(aug_wind(audio, sample_rate=sr), 'wind_env')

Saved: augmented_audios/aug_wind_env.wav


In [8]:
# ────────────────────────────────────────────────
# 6. Clipping / distortion
# ────────────────────────────────────────────────
aug_clip = Compose([ClippingDistortion(min_percentile_threshold=15, max_percentile_threshold=35, p=1.0)])

save_augmented(aug_clip(audio, sample_rate=sr), 'clipping')

Saved: augmented_audios/aug_clipping.wav


In [9]:
# ────────────────────────────────────────────────
# 7. Time stretching
# ────────────────────────────────────────────────
aug_stretch = Compose([TimeStretch(min_rate=0.85, max_rate=1.15, p=1.0)])

save_augmented(aug_stretch(audio, sample_rate=sr), 'time_stretch')

Saved: augmented_audios/aug_time_stretch.wav


In [10]:
# ────────────────────────────────────────────────
# 8. Pitch shift
# ────────────────────────────────────────────────
aug_pitch = Compose([PitchShift(min_semitones=-5, max_semitones=5, p=1.0)])

save_augmented(aug_pitch(audio, sample_rate=sr), 'pitch_shift')

Saved: augmented_audios/aug_pitch_shift.wav


In [11]:
# ────────────────────────────────────────────────
# 9. Channel imbalance simulation (mono → pseudo-stereo → back to mono)
# ────────────────────────────────────────────────
def aug_imbalance(y):
    left  = y * 0.6
    right = y * 1.4
    stereo = np.stack([left, right], axis=1)
    return stereo.mean(axis=1)  # fold back to mono

save_augmented(aug_imbalance(audio), 'channel_imbalance')

Saved: augmented_audios/aug_channel_imbalance.wav


In [12]:
# ────────────────────────────────────────────────
# 10. Packet loss simulation
# ────────────────────────────────────────────────
def aug_packet_loss(y, sr):
    y = y.copy()
    drop_ms = 20               # 20 ms drops
    drop_len = int(drop_ms / 1000 * sr)
    num_drops = 4              # adjust for ~2s file
    for _ in range(num_drops):
        start = np.random.randint(0, len(y) - drop_len)
        y[start:start+drop_len] = 0
    return y

save_augmented(aug_packet_loss(audio, sr), 'packet_loss')


Saved: augmented_audios/aug_packet_loss.wav


In [14]:
# ────────────────────────────────────────────────
# NEW: 4. Adversarial Frequency Injection (add low-amp band-limited noise in 4-8kHz)
# ────────────────────────────────────────────────
from scipy import signal
def aug_adversarial_freq(y, sr):
    # Design band-pass filter for 4-8kHz
    low = 4000 / (sr / 2)
    high = 8000 / (sr / 2)
    b, a = signal.butter(5, [low, high], btype='band')
    # Generate white noise, filter to band, scale low amp
    noise = np.random.normal(0, 1, len(y))
    band_noise = signal.lfilter(b, a, noise)
    band_noise *= 0.02  # Low amplitude
    return y + band_noise

save_augmented(aug_adversarial_freq(audio, sr), 'adversarial_freq')

Saved: augmented_audios/aug_adversarial_freq.wav


In [15]:
# ────────────────────────────────────────────────
# NEW: 5. Overlapping Speaker (proxy: add delayed self-audio as "speaker")
# ────────────────────────────────────────────────
def aug_overlapping_speaker(y, sr):
    # Delay by 0.5s and add at lower volume (simulate overlap)
    delay_samples = int(0.5 * sr)
    delayed = np.pad(y, (delay_samples, 0))[:len(y)] * 0.7  # Attenuate
    return y + delayed

save_augmented(aug_overlapping_speaker(audio, sr), 'overlapping_speaker')

Saved: augmented_audios/aug_overlapping_speaker.wav


In [16]:
# ────────────────────────────────────────────────
# NEW: 6. Partial Deepfake (first half normal, second half altered e.g. pitch shift proxy)
# ────────────────────────────────────────────────
def aug_partial_deepfake(y, sr):
    half = len(y) // 2
    aug_pitch = Compose([PitchShift(min_semitones=3, max_semitones=3, p=1.0)])
    y_second = aug_pitch(y[half:], sample_rate=sr)
    return np.concatenate([y[:half], y_second])

save_augmented(aug_partial_deepfake(audio, sr), 'partial_deepfake')

Saved: augmented_audios/aug_partial_deepfake.wav


In [17]:
# ────────────────────────────────────────────────
# NEW: 7. Time-Stretch + Pitch-Shift Manipulation
# ────────────────────────────────────────────────
aug_time_pitch = Compose([
    TimeStretch(min_rate=0.95, max_rate=1.05, p=1.0),
    PitchShift(min_semitones=-2, max_semitones=2, p=1.0)
])
save_augmented(aug_time_pitch(audio, sample_rate=sr), 'time_pitch_manip')

Saved: augmented_audios/aug_time_pitch_manip.wav


In [18]:
# ────────────────────────────────────────────────
# NEW: 8. Background TV/Music (proxy: low-pass noise for "muffled" content)
# ────────────────────────────────────────────────
aug_bg_tv = Compose([
    LowPassFilter(min_cutoff_freq=200, max_cutoff_freq=1000, p=1.0),
    AddGaussianNoise(min_amplitude=0.01, max_amplitude=0.03, p=1.0)
])
save_augmented(aug_bg_tv(audio, sample_rate=sr), 'bg_tv_music')

Saved: augmented_audios/aug_bg_tv_music.wav


In [19]:
# ────────────────────────────────────────────────
# NEW: 9. Low SNR Stress (multiple files at different SNR)
# ────────────────────────────────────────────────
def aug_low_snr(y, sr, snr_db):
    # Add Gaussian noise to achieve target SNR
    sig_power = np.mean(y ** 2)
    noise_power = sig_power / (10 ** (snr_db / 10))
    noise = np.random.normal(0, np.sqrt(noise_power), len(y))
    return y + noise

for snr in [20, 10, 5, 0]:
    save_augmented(aug_low_snr(audio, sr, snr), f'low_snr_{snr}db')

Saved: augmented_audios/aug_low_snr_20db.wav
Saved: augmented_audios/aug_low_snr_10db.wav
Saved: augmented_audios/aug_low_snr_5db.wav
Saved: augmented_audios/aug_low_snr_0db.wav


In [23]:
# ────────────────────────────────────────────────
# NEW: 10. Real-Time Streaming Chunks (save 1s and 0.5s chunks as separate files)
# ────────────────────────────────────────────────
def aug_streaming_chunks(y, sr, chunk_sec):
    chunk_samples = int(chunk_sec * sr)
    for i in range(0, len(y), chunk_samples):
        chunk = y[i:i + chunk_samples]
        if len(chunk) > 0:  # Skip empty
            save_augmented(chunk, f'stream_chunk_{chunk_sec}s_{i//chunk_samples}', subdir=f'chunks_{chunk_sec}s')

aug_streaming_chunks(audio, sr, 1.0)
aug_streaming_chunks(audio, sr, 0.5)

Saved: augmented_audios/chunks_1.0s/aug_stream_chunk_1.0s_0.wav
Saved: augmented_audios/chunks_1.0s/aug_stream_chunk_1.0s_1.wav
Saved: augmented_audios/chunks_1.0s/aug_stream_chunk_1.0s_2.wav
Saved: augmented_audios/chunks_0.5s/aug_stream_chunk_0.5s_0.wav
Saved: augmented_audios/chunks_0.5s/aug_stream_chunk_0.5s_1.wav
Saved: augmented_audios/chunks_0.5s/aug_stream_chunk_0.5s_2.wav
Saved: augmented_audios/chunks_0.5s/aug_stream_chunk_0.5s_3.wav
Saved: augmented_audios/chunks_0.5s/aug_stream_chunk_0.5s_4.wav
Saved: augmented_audios/chunks_0.5s/aug_stream_chunk_0.5s_5.wav
