In [1]:
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
import os

In [2]:
def create_spectrogram(file_path):
    audio_array, sample_rate= librosa.load(file_path)
    spec = librosa.feature.melspectrogram(y=audio_array,
                                    sr=sample_rate, 
                                        n_fft=2048, 
                                        hop_length=512, 
                                        win_length=None, 
                                        window='hann', 
                                        center=True, 
                                        pad_mode='reflect', 
                                        power=2.0,
                                    n_mels=128)
    log_spec = librosa.power_to_db(spec, ref=np.max)
    return spec,sample_rate

# Probando con un archivo limpio y uno con overlay de ruido

In [3]:
clean_file=r'wavs\clean\clnsp0.wav'
noise_file=r'wavs\noisy\output1.wav'
test_clean_spec,test_clean_sr =create_spectrogram(clean_file)
test_noisy_spec,test_noisy_sr=create_spectrogram(noise_file)

In [4]:
test_clean_spec.shape,test_noisy_spec.shape

((128, 475), (128, 475))

In [23]:
round(test_clean_spec.shape[1]/128)

4

In [21]:
np.hsplit(test_clean_spec,5)[0].shape

(128, 95)

In [5]:
test_clean_sr

22050

In [6]:
test_noisy_sr

22050

# Generador de archivos en masa usando batcheador

In [27]:
from math import ceil
def create_spec_from_dir(dir_path,top_x=200):
    #dir_path directorio o folder donde estan los wavs
    # top_x opcional cuantos archivos maximo desea usar, dejar vacio para usarlos todos
    dir = os.listdir(dir_path)
    spec_list=[]
    s_rates=[]
    for i, file in enumerate(dir):
        try:
            if i<=top_x:
                input_file = os.path.join(dir_path, file)
                ms,sr=create_spectrogram(input_file)
                num_batches=ceil(ms.shape[1]/128) if ms.shape[1]>128 else 1
                ms=np.resize(ms,(ms.shape[0],128*num_batches))
                batches=np.hsplit(ms,num_batches)
                for batch in batches:
                    spec_list.append(batch)
                    s_rates.append(sr)
        except:
            print(file," file skipped")
    
    return spec_list,s_rates

In [39]:
clean_specs,clean_s_rates=create_spec_from_dir(r'wavs\clean',24175)
noisy_specs,noisy_s_rates=create_spec_from_dir(r'wavs\noisy',24175)

In [43]:
len(clean_specs),len(noisy_specs)

(37537, 37533)

In [14]:
clean_specs[0].shape

(128, 128)

In [15]:
min(clean_s_rates),max(clean_s_rates)

(22050, 22050)

In [16]:
min(noisy_s_rates),max(noisy_s_rates)

(22050, 22050)

In [17]:
def standardize_specs(clean_specs,noisy_specs):
    
    #getting max lenght of all audios
    max_y=0
    
    for i,j in zip(clean_specs,noisy_specs):
        if i.shape[1]>max_y: max_y=i.shape[1]
        if j.shape[1]>max_y: max_y=j.shape[1]
    print(max_y)
    # reshapping all spectrogram

    for index,s in enumerate(clean_specs):
        try:
            clean_specs[index]=np.resize(s,(s.shape[0],max_y))
        except Exception as e:
            print(f"skipping clean {index} {e}")
    
    clean_specs=np.array(clean_specs)
    clean_specs=clean_specs.reshape(-1,s.shape[0],max_y,1)

    for index,s in enumerate(noisy_specs):
        try:
            noisy_specs[index]=np.resize(s,(s.shape[0],max_y))
        except Exception as e:
            print(f"skipping noise {index} {e}")
            
    noisy_specs=np.array(noisy_specs)
    noisy_specs=noisy_specs.reshape(-1,s.shape[0],max_y,1)

    return clean_specs,noisy_specs

In [18]:
s_clean_specs,s_noisy_specs=standardize_specs(clean_specs,noisy_specs)

128


In [31]:
type(s_clean_specs)

numpy.ndarray

In [42]:
samples=min(s_clean_specs.shape[0],s_noisy_specs.shape[0])
s_clean_specs=s_clean_specs[:samples]
s_noisy_specs=s_noisy_specs[:samples]

In [44]:
from tensorflow.keras.utils import Sequence
import numpy as np   

class DataGenerator(Sequence):
    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        return batch_x, batch_y

train_gen = DataGenerator(s_noisy_specs, s_clean_specs, 32)
#test_gen = DataGenerator(X_test, y_test, 32)

# Autoencoder Naive

In [45]:
import os, shutil
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
from keras import layers
from keras import models
from keras.layers import Dense,Flatten,Reshape,InputLayer
from keras.models import Sequential
from keras import optimizers
from tensorflow.keras.optimizers import Adam
from keras.preprocessing import image
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
#import mlflow
#import mlflow.tensorflow
from PIL import Image
import re

## Autoencoder 0

solo 2 capas densas y regularización

In [46]:
drop_out=0.1

img_shape=(s_clean_specs.shape[1],s_clean_specs.shape[2])

auto_encoder0=models.Sequential()
auto_encoder0.add(layers.Input(shape=img_shape))
auto_encoder0.add(layers.Flatten())
auto_encoder0.add(layers.Dense(64))
auto_encoder0.add(layers.Dropout(drop_out))
auto_encoder0.add(layers.Dense(64))
auto_encoder0.add(Dense(np.prod(img_shape))) # np.prod(img_shape) is the same as 32*32*3, it's more generic than saying 3072
auto_encoder0.add(layers.Reshape(img_shape))
auto_encoder0.compile(optimizer='adamax', loss='mse')
auto_encoder0.summary()

history0 = auto_encoder0.fit(train_gen,
                    epochs=20,
                    )

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 16384)             0         
                                                                 
 dense_6 (Dense)             (None, 64)                1048640   
                                                                 
 dropout_2 (Dropout)         (None, 64)                0         
                                                                 
 dense_7 (Dense)             (None, 64)                4160      
                                                                 
 dense_8 (Dense)             (None, 16384)             1064960   
                                                                 
 reshape_2 (Reshape)         (None, 128, 128)          0         
                                                                 
Total params: 2,117,760
Trainable params: 2,117,760
No

## Autoencoder 1

capas más complejas

In [47]:
drop_out=0.1

img_shape=(s_clean_specs.shape[1],s_clean_specs.shape[2])

auto_encoder=models.Sequential()
auto_encoder.add(layers.Input(shape=img_shape))
auto_encoder.add(layers.Flatten())
auto_encoder.add(layers.Dense(128))
auto_encoder.add(layers.Dropout(drop_out))
auto_encoder.add(layers.Dense(256))
auto_encoder.add(layers.Dropout(drop_out))
auto_encoder.add(layers.Dense(512))
auto_encoder.add(layers.Dropout(drop_out))
auto_encoder.add(layers.Dense(256))
auto_encoder.add(layers.Dense(128))
auto_encoder.add(Dense(np.prod(img_shape))) # np.prod(img_shape) is the same as 32*32*3, it's more generic than saying 3072
auto_encoder.add(layers.Reshape(img_shape))


auto_encoder.compile(optimizer='adamax', loss='mse')
auto_encoder.summary()

history1 = auto_encoder.fit(train_gen,
                    epochs=20,
                    )

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_3 (Flatten)         (None, 16384)             0         
                                                                 
 dense_9 (Dense)             (None, 128)               2097280   
                                                                 
 dropout_3 (Dropout)         (None, 128)               0         
                                                                 
 dense_10 (Dense)            (None, 256)               33024     
                                                                 
 dropout_4 (Dropout)         (None, 256)               0         
                                                                 
 dense_11 (Dense)            (None, 512)               131584    
                                                                 
 dropout_5 (Dropout)         (None, 512)              

## autoencoder 2

In [48]:
drop_out=0.1

img_shape=(s_clean_specs.shape[1],s_clean_specs.shape[2])

auto_encoder2=models.Sequential()
auto_encoder2.add(layers.Input(shape=img_shape))
auto_encoder2.add(layers.Flatten())
auto_encoder2.add(layers.Dense(64))
auto_encoder2.add(layers.Dropout(drop_out))
auto_encoder2.add(layers.Dense(64))
auto_encoder2.add(Dense(np.prod(img_shape))) # np.prod(img_shape) is the same as 32*32*3, it's more generic than saying 3072
auto_encoder2.add(layers.Reshape(img_shape))
auto_encoder2.add(layers.Flatten())
auto_encoder2.add(layers.Dense(128))
auto_encoder2.add(layers.Dropout(drop_out))
auto_encoder2.add(layers.Dense(128))
auto_encoder2.add(Dense(np.prod(img_shape))) # np.prod(img_shape) is the same as 32*32*3, it's more generic than saying 3072
auto_encoder2.add(layers.Reshape(img_shape))
auto_encoder2.compile(optimizer='adamax', loss='mse')
auto_encoder2.summary()

history2 = auto_encoder2.fit(train_gen,
                    epochs=20,
                    )

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_4 (Flatten)         (None, 16384)             0         
                                                                 
 dense_15 (Dense)            (None, 64)                1048640   
                                                                 
 dropout_6 (Dropout)         (None, 64)                0         
                                                                 
 dense_16 (Dense)            (None, 64)                4160      
                                                                 
 dense_17 (Dense)            (None, 16384)             1064960   
                                                                 
 reshape_4 (Reshape)         (None, 128, 128)          0         
                                                                 
 flatten_5 (Flatten)         (None, 16384)            

## Autoencoder 3

In [49]:
drop_out=0.1

img_shape=(s_clean_specs.shape[1],s_clean_specs.shape[2])

auto_encoder3=models.Sequential()
auto_encoder3.add(layers.Input(shape=img_shape))
auto_encoder3.add(layers.Flatten())
auto_encoder3.add(layers.Dense(32))
auto_encoder3.add(layers.Dropout(drop_out))
auto_encoder3.add(layers.Dense(32))
auto_encoder3.add(Dense(np.prod(img_shape))) # np.prod(img_shape) is the same as 32*32*3, it's more generic than saying 3072
auto_encoder3.add(layers.Reshape(img_shape))
auto_encoder3.compile(optimizer='adamax', loss='mse')
auto_encoder3.summary()

history3 = auto_encoder3.fit(train_gen,
                    epochs=20,
                    batch_size=32
                    )

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_6 (Flatten)         (None, 16384)             0         
                                                                 
 dense_21 (Dense)            (None, 32)                524320    
                                                                 
 dropout_8 (Dropout)         (None, 32)                0         
                                                                 
 dense_22 (Dense)            (None, 32)                1056      
                                                                 
 dense_23 (Dense)            (None, 16384)             540672    
                                                                 
 reshape_6 (Reshape)         (None, 128, 128)          0         
                                                                 
Total params: 1,066,048
Trainable params: 1,066,048
No

# Autoencoder CNN

In [50]:
img_shape_cnn=(s_clean_specs.shape[1],s_clean_specs.shape[2],1)
img_shape_cnn

(128, 128, 1)

In [117]:
np.prod((128, 702))

89856

In [54]:
s_clean_specs.shape,s_noisy_specs.shape

((11377, 128, 128, 1), (11377, 128, 128, 1))

In [61]:
img_shape_cnn=(s_clean_specs.shape[1],s_clean_specs.shape[2],1)

cnn_auto_encoder0=models.Sequential()
cnn_auto_encoder0.add(layers.Conv2D(filters = 3, kernel_size = (3,3),padding = 'Same', 
                activation ='relu', input_shape = (128,128,1)))

cnn_auto_encoder0.add(layers.Flatten())

cnn_auto_encoder0.summary()
cnn_auto_encoder0.compile(optimizer='adamax', loss='mse')
history_cnn0 = cnn_auto_encoder0.fit(train_gen,
                    epochs=20,
                    batch_size=8
                    )

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 128, 128, 3)       30        
                                                                 
 flatten_13 (Flatten)        (None, 49152)             0         
                                                                 
Total params: 30
Trainable params: 30
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20


InvalidArgumentError: Graph execution error:

Detected at node 'mean_squared_error/SquaredDifference' defined at (most recent call last):
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
      app.start()
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\ipykernel\kernelapp.py", line 711, in start
      self.io_loop.start()
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\tornado\platform\asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\ipykernel\kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\ipykernel\ipkernel.py", line 411, in do_execute
      res = shell.run_cell(
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\ipykernel\zmqshell.py", line 531, in run_cell
      return super().run_cell(*args, **kwargs)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\IPython\core\interactiveshell.py", line 2945, in run_cell
      result = self._run_cell(
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\IPython\core\interactiveshell.py", line 3000, in _run_cell
      return runner(coro)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\IPython\core\interactiveshell.py", line 3203, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\IPython\core\interactiveshell.py", line 3382, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\IPython\core\interactiveshell.py", line 3442, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\USUARIO\AppData\Local\Temp\ipykernel_20120\2784571528.py", line 11, in <module>
      history_cnn0 = cnn_auto_encoder0.fit(train_gen,
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\keras\engine\training.py", line 1409, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\keras\engine\training.py", line 1051, in train_function
      return step_function(self, iterator)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\keras\engine\training.py", line 1040, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\keras\engine\training.py", line 1030, in run_step
      outputs = model.train_step(data)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\keras\engine\training.py", line 890, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\keras\engine\training.py", line 948, in compute_loss
      return self.compiled_loss(
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\keras\losses.py", line 139, in __call__
      losses = call_fn(y_true, y_pred)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\keras\losses.py", line 243, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "c:\Users\USUARIO\anaconda3\envs\audio_dl\lib\site-packages\keras\losses.py", line 1327, in mean_squared_error
      return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)
Node: 'mean_squared_error/SquaredDifference'
required broadcastable shapes
	 [[{{node mean_squared_error/SquaredDifference}}]] [Op:__inference_train_function_113616]

In [85]:
img_shape=(s_clean_specs.shape[1],s_clean_specs.shape[2],1)
auto_encoder2=models.Sequential()
auto_encoder2.add(layers.Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                activation ='relu', input_shape = img_shape))
auto_encoder2.add(layers.Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                activation ='relu'))
auto_encoder2.add(layers.MaxPool2D(pool_size=(2,2)))
auto_encoder2.add(layers.BatchNormalization())
auto_encoder2.add(layers.Dropout(drop_out))
auto_encoder2.add(layers.Flatten())
auto_encoder2.add(layers.Dense(512, activation = "relu"))
auto_encoder2.add(layers.Dropout(drop_out))
auto_encoder2.add(layers.Dense(64, activation = "softmax"))
auto_encoder2.add(Dense(np.prod(img_shape))) # np.prod(img_shape) is the same as 32*32*3, it's more generic than saying 3072
auto_encoder2.add(layers.Reshape(img_shape))

auto_encoder2.compile(optimizer='adamax', loss='mse')
auto_encoder2.summary()

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_21 (Conv2D)          (None, 128, 702, 32)      832       
                                                                 
 conv2d_22 (Conv2D)          (None, 128, 702, 32)      25632     
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 64, 351, 32)      0         
 g2D)                                                            
                                                                 
 batch_normalization_10 (Bat  (None, 64, 351, 32)      128       
 chNormalization)                                                
                                                                 
 dropout_16 (Dropout)        (None, 64, 351, 32)       0         
                                                                 
 flatten_11 (Flatten)        (None, 718848)          

In [86]:
#history2 = auto_encoder2.fit(x=s_noisy_specs, y=s_clean_specs, epochs=100)
history2 = auto_encoder2.fit(train_gen,
                    epochs=20,
                    )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# Predicciones

In [172]:
preds=auto_encoder.predict(s_noisy_specs)



In [173]:
preds.shape

(2001, 128, 702)

In [174]:
preds[0].shape

(128, 702)

In [175]:
clean_specs[0]

array([[1.88221908e-04, 2.46463984e-04, 2.61851761e-04, ...,
        2.57317000e-03, 8.99837411e-04, 2.44650152e-03],
       [1.21181156e-03, 9.17276135e-04, 9.90588916e-04, ...,
        5.54336328e-03, 2.85331719e-02, 2.60114968e-01],
       [1.14793324e+00, 1.52739763e+00, 1.82428646e+00, ...,
        2.50480145e-01, 8.19973946e-02, 4.72248858e-03],
       ...,
       [9.29996677e-05, 9.84362341e-05, 1.06841406e-04, ...,
        2.54283252e-04, 6.28159833e-05, 1.36140082e-03],
       [2.84687756e-03, 9.32404399e-03, 1.19536798e-02, ...,
        1.18909981e-02, 1.26775932e-02, 8.28386191e-03],
       [6.48943149e-03, 3.44618829e-03, 1.06449577e-03, ...,
        2.61219740e-02, 3.30388226e-04, 1.67555991e-04]], dtype=float32)

In [176]:
len(preds)

2001

In [177]:
reversed_log=librosa.db_to_power(preds[0])
# step3 converting mel-spectrogrma back to wav file
res = librosa.feature.inverse.mel_to_audio(reversed_log, 
                                           sr=noisy_s_rates[0], 
                                           n_fft=2048, 
                                           hop_length=512, 
                                           win_length=None, 
                                           window='hann', 
                                           center=True, 
                                           pad_mode='reflect', 
                                           power=2.0, 
                                           n_iter=32)

import soundfile as sf
sf.write("test2.wav", res, noisy_s_rates[0])

# Convertidor de espectrograma a audio

In [41]:
# step1 - converting a wav file to numpy array and then converting that to mel-spectrogram
scale_file=r'wavs\clean\clnsp0.wav'
my_audio_as_np_array, my_sample_rate= librosa.load(scale_file)

# step2 - converting audio np array to spectrogram
spec = librosa.feature.melspectrogram(y=my_audio_as_np_array,
                                        sr=my_sample_rate, 
                                            n_fft=2048, 
                                            hop_length=512, 
                                            win_length=None, 
                                            window='hann', 
                                            center=True, 
                                            pad_mode='reflect', 
                                            power=2.0,
                                     n_mels=128)
log_spec = librosa.power_to_db(spec)
reversed_log=librosa.db_to_power(log_spec)
# step3 converting mel-spectrogrma back to wav file
res = librosa.feature.inverse.mel_to_audio(reversed_log, 
                                           sr=my_sample_rate, 
                                           n_fft=2048, 
                                           hop_length=512, 
                                           win_length=None, 
                                           window='hann', 
                                           center=True, 
                                           pad_mode='reflect', 
                                           power=2.0, 
                                           n_iter=32)

# step4 - save it as a wav file
import soundfile as sf
sf.write("test1.wav", res, my_sample_rate)

In [39]:
import soundfile as sf
sf.write('scale.wav', scale, sr, format='ogg', subtype='vorbis')
sf.write('audio.wav', audio, sr, format='ogg', subtype='vorbis')