# Robot Voice 

In [1]:
from scipy.io import wavfile
import numpy as np
import sounddevice as sd

In [2]:
# define necessary utility functions
def build_sine_table(f_sine, samp_freq, data_type):
    
    
    # compute the integer conversion parameters
    if data_type == np.int16:
        MAX_SINE = 2**(15)-1
    elif data_type == np.int32:
        MAX_SINE = 2**(31)-1
    
    # periods
    samp_per = 1./samp_freq
    sine_per = 1./f_sine

    # compute the right number of (integer) time instances
    LOOKUP_SIZE = len(np.arange(0, sine_per, samp_per))
    n = np.arange(LOOKUP_SIZE)
    
    
    freq_step = f_sine/samp_freq
    SINE_TABLE = np.sin(2*np.pi*n*freq_step) * MAX_SINE

    return SINE_TABLE, MAX_SINE, LOOKUP_SIZE

In [3]:
# state variables
def init(f_sine, samp_freq):
    global sine_pointer
    global x_prev
    global GAIN
    global SINE_TABLE
    global MAX_SINE
    global LOOKUP_SIZE

    GAIN = 1
    x_prev = 0
    sine_pointer = 0
    
    # compute SINE TABLE
    SINE_TABLE, MAX_SINE, LOOKUP_SIZE  = build_sine_table(f_sine, samp_freq, data_type)

In [4]:
def process(input_buffer, output_buffer, buffer_len):

    # specify global variables modified here
    global x_prev
    global sine_pointer

    for n in range(buffer_len):
        
        # high pass filter
        output_buffer[n] = input_buffer[n] - x_prev

        # modulation
        output_buffer[n] = output_buffer[n] * SINE_TABLE[sine_pointer]/MAX_SINE

        # update state variables
        sine_pointer = (sine_pointer+1)%LOOKUP_SIZE
        x_prev = input_buffer[n]

### Main cell when working on a file

In [5]:
# parameters
buffer_len = 256

# test signal
input_wav = "speech.wav"
samp_freq, signal = wavfile.read(input_wav)

signal = signal[:,1]  # get first channel
n_buffers = len(signal)//buffer_len
data_type = signal.dtype

print("Sampling frequency : %d Hz" % samp_freq)
print("Data type          : %s" % signal.dtype)

# allocate input and output buffers
input_buffer = np.zeros(buffer_len, dtype=data_type)
output_buffer = np.zeros(buffer_len, dtype=data_type)

"""
Nothing to touch after this!
"""

modulation_freq = 350

init(modulation_freq, samp_freq)
# simulate block based processing
signal_proc = np.zeros(n_buffers*buffer_len, dtype=data_type)

for k in range(n_buffers):

    # index the appropriate samples
    input_buffer = signal[k*buffer_len:(k+1)*buffer_len]
    process(input_buffer, output_buffer, buffer_len)
    signal_proc[k*buffer_len:(k+1)*buffer_len] = output_buffer

# write to WAV
wavfile.write("speech_mod.wav", samp_freq, signal_proc)



Sampling frequency : 44100 Hz
Data type          : int16


### Main cell when working in real time

In [6]:
"""
Nothing to touch after this!
"""
# parameters
buffer_len = 256
modulation_freq = 500
data_type = np.int16
samp_freq = 44100

try:
    sd.default.samplerate = 16000
    sd.default.blocksize = buffer_len
    sd.default.dtype = data_type

    def callback(indata, outdata, frames, time, status):
        if status:
            print(status)
        process(indata[:,0], outdata[:,0], frames)

    init(modulation_freq, samp_freq)
    with sd.Stream(channels=1, callback=callback):
        print('#' * 80)
        print('press Return to quit')
        print('#' * 80)
        input()
except KeyboardInterrupt:
    parser.exit('\nInterrupted by user')

################################################################################
press Return to quit
################################################################################

