# Adding Speed Pertubation
Speed pertubation acts on the audio signal itself so it cannot (? should not) be applied once the audio signal is in torch.tensor form. Currently we load audio with:  

``
audio, rate = torchaudio.load(path)
``
This uses the loading capabilities in the selected backend. In all practical cases (i.e. except on windows) the backend used will be sox. 

Note that in the mlperf repo we use ``librosa`` to perform speed pertubation but this would involve adding another dependency and massively complicating the dataset transforms control flow which I am not willing to do. 

NOTE: it will be necessary to create a _Dataset parent class so that CommonVoice and LibriSpeech can share the same ``__get_item__``. 

In [None]:
import torch,torchaudio
import librosa
import numpy as np

In [None]:
fp = '/home/julian/Music/BTB2.wav'

In [None]:
audio, rate = torchaudio.load(fp)

assert rate == 16000, f"{path} sample rate == {rate} != 16000"
audio.shape # channels, length

In [None]:
# class MyDataset(Dataset):
#     def __init__(self, audiodir_path):
#         self.data = [os.path.join(audiodir_path, fn) for fn in os.listdir(audiodir_path)]
#         self.E = torchaudio.sox_effects.SoxEffectsChain()
#         self.E.append_effect_to_chain("rate", [16000])  # resample to 16000hz
#         self.E.append_effect_to_chain("channels", ["1"])  # mono signal
#     def __getitem__(self, index):
#         fn = self.data[index]
#         self.E.set_input_file(fn)
#         x, sr = self.E.sox_build_flow_effects()
#         return x, sr
#     def __len__(self):
#         return len(self.data)
# torchaudio.initialize_sox()
# ds = MyDataset(path_to_audio_files)
# for sig, sr in ds:
#   [do something here]
# torchaudio.shutdown_sox()

In [None]:
torchaudio.initialize_sox()

In [None]:
# do this with each speed
def speed_perturb(speed, fp):
    chain = torchaudio.sox_effects.SoxEffectsChain()
    chain.append_effect_to_chain('speed', [speed])
    chain.append_effect_to_chain('rate', 16000)
    chain.set_input_file(fp)
    return chain.sox_build_flow_effects()

In [None]:
chain.set_input_file(fp)
x, sr = chain.sox_build_flow_effects()

In [None]:
x.shape

In [None]:
x1, _ = speed_perturb(0.85, fp)
x2, _ = speed_perturb(0.85, fp)
x1.shape, x2.shape

In [None]:
assert torch.allclose(x1, x2)

In [None]:
torchaudio.shutdown_sox()

In [None]:
def fn1(path):
    return torchaudio.load(path)
def fn2(path):
    E = torchaudio.sox_effects.SoxEffectsChain()
    E.append_effect_to_chain('speed', [1.15])
    E.append_effect_to_chain('rate', 16000)
    E.set_input_file(path)
    return E.sox_build_flow_effects()
def fn3(path):
    audio, rate = torchaudio.load(path)
    audio = librosa.effects.time_stretch(np.asfortranarray(audio.numpy()[0]),1 )
    return audio

In [None]:
import cProfile

In [None]:
cProfile.run('fn1(fp)')

In [None]:
cProfile.run('fn2(fp)')

In [None]:
cProfile.run('fn3(fp)')