In [None]:
import numpy as np
import librosa

import librosa.display

import IPython.display as ipd
from matplotlib import pyplot as plt

from rtsi import RTSI

In [None]:
sr         = 16000
win_length = 1024
hop_length = 256

audio, _ = librosa.load('example_audio/p229_005.wav', sr=sr)

rtsi = RTSI(win_length=win_length, hop_length=hop_length)

spect       = rtsi.audio_to_spect(audio)
audio_recon = rtsi.spect_to_audio(spect)

print("Original Audio:")
ipd.display(ipd.Audio(audio,       rate=sr))

print("Spectrogram:")
# take the square root for visualization purposes
librosa.display.specshow(np.sqrt(spect), sr=sr, hop_length=hop_length, x_axis='time', y_axis='linear')
plt.show()

print("Reconstructed Audio:")
ipd.display(ipd.Audio(audio_recon, rate=sr))


In [None]:
# use rtsi for online modifications on the magnitude spectrogram

def do_cool_stuff(col):
    # scoop low frequencies
    col[:len(col)//16,:] = 0
    return col

audio_recon = np.zeros_like(audio)
for i in range(0, len(audio)-win_length, hop_length):    
    col = rtsi.audio_to_spect(audio[i:i+win_length])
    
    # do cool stuff to col here
    col = do_cool_stuff(col)    
    
    audio_recon[i:i+hop_length] = rtsi.spect_to_audio(col) 
    
print("Original Audio:")
ipd.display(ipd.Audio(audio, rate=sr))

print("Modified Audio:")
ipd.display(ipd.Audio(audio_recon, rate=sr))

In [None]:
# use your own mic! (use headphones instead of speakers to prevent audio feedback)

import sounddevice as sd 

n_cols    = 1
blocksize = n_cols * hop_length
        
class RTSIWrapper():
    def __init__(self, rtsi):    
        self.rtsi = rtsi
        
        self.memory_length = self.rtsi.win_length - self.rtsi.hop_length 
        self.memory        = np.zeros(self.memory_length)
        
    def callback(self, indata, outdata, frames, time, status):  
        
        x = indata[:, 0] # audio comes in in shape (hop_length, 1), but must be a 1d array of length hop_length

        # deal with memory (rtsi needs more audio as input than it returns as output)
        x = np.concatenate([self.memory, x])       
        self.memory = x[-self.memory_length:]

        col = self.rtsi.audio_to_spect(x)    
        col = do_cool_stuff(col)        
        x   = self.rtsi.spect_to_audio(col)
        
        outdata[:] = x[:, None] # restore the extra dimension
        
rtsi_wrapper = RTSIWrapper(rtsi)
        
with sd.Stream(
    device     = sd.default.device, 
    channels   = 1, 
    callback   = rtsi_wrapper.callback, 
    blocksize  = blocksize, 
    samplerate = sr
):
    while True:
        response = input('Recording, Converting and Playing audio... Press \'q\' to quit.')
        if response in ('', 'q', 'Q'):
            break