In [110]:
import torchaudio
import torch
from IPython.display import Audio

In [111]:
wf, sr = torchaudio.load('/home/ubuntu/asr_project_template/test_data/audio/84-121550-0000.flac')

In [112]:
Audio(wf, rate=sr)

# Lets test some augmentations

In [113]:
def random_loudness(wf, min_top, max_top):
    wf_res = wf / wf.max()
    scale = torch.rand(1).item() * (max_top - min_top) + min_top
    print('scale:', scale)
    return wf_res * scale

Audio(random_loudness(wf, 0.5, 1), rate=sr, normalize=False)

scale: 0.8269729614257812


In [114]:
def calc_energy_db(wav):
    return 10 * torch.log10((wav ** 2).mean())

def add_gausian_noise(wf, snr_min, snr_max):
    snr = torch.rand(1).item() * (snr_max - snr_min) + snr_min
    print("snr:", snr)
    noise = torch.normal(0, 0.01, size=wf.shape)
    db_change = calc_energy_db(wf) - calc_energy_db(noise) - snr
    gain = (10 ** (db_change / 10)) ** 0.5
    
    return wf + noise * gain

Audio(add_gausian_noise(wf, 10, 25)[:, :32000], rate=sr, normalize=False)

snr: 23.362032175064087


In [115]:
def random_low_pass(wf, freq_min, freq_max):
    freq_min_log = torch.log(torch.tensor(freq_min))
    freq_max_log = torch.log(torch.tensor(freq_max))
    freq = torch.rand(1).item() * (freq_max_log - freq_min_log) + freq_min_log
    freq = torch.exp(freq)
    print("freq:", freq)
    return torchaudio.functional.lowpass_biquad(wf, 16000, freq, 10)

Audio(random_low_pass(wf, 1000, 8000)[:, :32000], rate=sr, normalize=False)

freq: tensor(1305.0615)


# Lets check if the implementation works

In [5]:
from hw_asr.tests.utils import clear_log_folder_after_use
from hw_asr.utils.parse_config import ConfigParser
from hw_asr.utils.object_loading import get_dataloaders

if __name__ == "__main__":
    my_wave_augs = [
        {"type": "AddNoise", "args": {"snr_min": 10, "snr_max": 25}},
        # {
        #     "type": "RandomApply",
        #     "args": {
        #         "augmentation": {"type": "HighPass", "args": {"freq_min": 10, "freq_max": 500}},
        #         "p": 0.5
        #     }
        # },
        {"type": "HighPass", "args": {"freq_min": 10, "freq_max": 500}},
        {"type": "LowPass", "args": {"freq_min": 1000, "freq_max": 8000}},
        {"type": "ChangeLoudness", "args": {"min_top": 0.5, "max_top": 1}},
    ]

    config_parser = ConfigParser.get_test_configs()
    config_parser._config['augmentations']['wave'] = my_wave_augs
    with clear_log_folder_after_use(config_parser):
        dataloaders = get_dataloaders(config_parser, config_parser.get_text_encoder())

ModuleNotFoundError: No module named 'hw_asr'