In [None]:
import numpy as np
import sounddevice as sd
from scipy.io import wavfile
from matplotlib import pyplot as plt
import scipy.signal as sp

In [None]:
def ms2smp(ms, fs):
    """
    Parameters
    ----------
    ms: float
        Time in milliseconds
    fs: float
        Sampling rate in Hz.
    """
    # seconds = ms/1000
    return int(fs*ms/1000.)

In [None]:
def win_taper(N, a, data_type=np.int16):

    """
    Parameters
    ----------
    N: the length of the grain (in samples)
    a: a double between 0 and 1 representing the fraction of the N samples that will be attenuated
        (a/2 samples are attenuated on both sides)
    data_type: the data type of the output
        
    output: a profile represented by values that span the entire positive range of "data_type", that will modify the sound
    samples at the beginning and the start of grains so that we can make grains overlap without problem."""
    
    
    # Number of samples that are attenuated on each side
    nb_attenuated = int(N * a / 2)
    
    # Create the increasing "ramp"
    ramp = np.arange(0, nb_attenuated) / float(nb_attenuated)
    
    # Create the final profile by concatenating increasing ramp, untouched samples and decreasing ramp
    win = np.concatenate((ramp, 
        np.ones(N-2*nb_attenuated), 
        ramp[::-1]))
    
    # The maximum value that can be represented using this datatype
    max_val = np.iinfo(data_type).max

    # Make sure that the correct type is returned
    return (win*max_val).astype(data_type)

In [None]:
def compute_stride(N, a):
    return N - int(N * a / 2) - 1

In [None]:
def build_linear_interp_table(n_samples, down_fact, data_type=np.int16):
    
    #previous existing sample
    which_samples = []
    #fractional amplitude.
    amplitudes = []
    for n in range(n_samples):
        
        # The interpolation time
        t = n*down_fact
        
        # The largest integer smaller than t
        N = np.floor(t)
        
        # The amplitude that should have this latest sample
        # (if t = 1.01 s then the amplitude of the sample at time N=1 sould be 0.99)
        a = 1-(t-N)
        
        which_samples.append(int(N))
        amplitudes.append(a)

    MAX_VAL = np.iinfo(data_type).max
    
    # Set the amplitudes in the range defined by the data_type
    amplitudes = (np.array(amplitudes)*MAX_VAL).astype(data_type)

    return which_samples, amplitudes

In [None]:
def bac(x, p):
    # compute the biased autocorrelation for x up to lag p
    L = len(x)
    r = np.zeros(p+1)
    for m in range(0, p+1):
        for n in range(0, L-m):
            r[m] += x[n] * x[n+m]
        r[m] /= float(L)
    return r

In [None]:
def bac2(x,p,m):
    L = len(x)
    temp = 0.
    for n in range(0,L-m):
        temp += x[n]*x[n+m]
    temp /= float(L)
    return temp

In [None]:
def ld(r, p):
    # solve the toeplitz system using the Levinson-Durbin algorithm
    g = r[1] / r[0]
    a = np.array([g])
    v = (1. - g * g) * r[0];
    for i in range(1, p):
        g = (r[i+1] - np.dot(a, r[1:i+1])) / v
        a = np.r_[ g,  a - g * a[i-1::-1] ]
        v *= 1. - g*g
    # return the coefficients of the A(z) filter
    return np.r_[1, -a[::-1]]

In [None]:
def lpc(x, p):
    # compute p LPC coefficients for a speech segment
    
    return ld(bac(x, p), p)

In [10]:
# state variables and constants
def init(GRAIN_LEN_SAMP, a, shift_factor, data_type):
    
    float32 = np.float32

    # lookup table for tapering window
    global WIN
    
    # Number of coefficients for LPC
    global P 

    # lookup table for linear interpolation 
    global SAMP_VALS # List of indices that represent the previous 'entire' samples
    global AMP_VALS # list of coefficients that represent the strength to give to the previous 'entire' sample
    
    # To be passed between different iterations (arrays of float32 elements in [-1;1])
    global PREVIOUS_RAW
    global PREVIOUS_DOWN_WINDOWED
    
    # To process each iteration (arrays of float32 elements [-1;1])
    global GRAIN
    global RESAMPLED_GRAIN
    
    WIN = win_taper(GRAIN_LEN_SAMP, a, data_type)
    SAMP_VALS, AMP_VALS = build_linear_interp_table(GRAIN_LEN_SAMP, shift_factor, data_type)
    
    P = 20
    
    PREVIOUS_RAW = np.zeros(int(GRAIN_LEN_SAMP*a/2) + 1, dtype=float32)
    PREVIOUS_DOWN_WINDOWED = np.zeros(int(GRAIN_LEN_SAMP*a/2) + 1, dtype=float32)
    
    GRAIN = np.zeros(GRAIN_LEN_SAMP,dtype=float32)
    RESAMPLED_GRAIN = np.zeros(GRAIN_LEN_SAMP, dtype=float32)

In [11]:
def process(input_buffer, output_buffer, buffer_len, GRAIN_LEN_SAMP, OVERLAP_LEN, MAX_VAL,shift_factor):
    """
    input_buffer and output_buffer are arrays containing int16 elements
    """

    # need to specify those global variables changing in this function (state variables and intermediate values)
    global PREVIOUS_RAW
    global PREVIOUS_DOWN_WINDOWED
    global GRAIN
    global SAMP_VALS
    global AMP_VALS
    global RESAMPLED_GRAIN
    global WIN
    global P

    # append samples from previous buffer
    # recall GRAIN contains float between -1 and 1
    for n in range(GRAIN_LEN_SAMP):
        if n < OVERLAP_LEN:
            GRAIN[n] = PREVIOUS_RAW[n]
        else:
            GRAIN[n] = np.float32(input_buffer[n - OVERLAP_LEN] ) / MAX_VAL
    
    # apply LDC      
    a = lpc(GRAIN, P)
    GRAIN = sp.lfilter(a, [1], GRAIN)
    
    # resample
    # recall RESAMPLED_GRAIN contains float between -1 and 1
    for n in range(GRAIN_LEN_SAMP):
        coeff = np.float32(AMP_VALS[n])/ MAX_VAL
        RESAMPLED_GRAIN[n] = coeff * GRAIN[SAMP_VALS[n]] + (1-coeff) * GRAIN[SAMP_VALS[n]+1]
    
    # LPC
    RESAMPLED_GRAIN = sp.lfilter([1], a, RESAMPLED_GRAIN)
    
    # apply window
    # recall RESAMPLED_GRAIN contains float between -1 and 1 and WIN contains int16 elements
    for n in range(GRAIN_LEN_SAMP):
        RESAMPLED_GRAIN[n] = RESAMPLED_GRAIN[n] * np.float32(WIN[n]) / MAX_VAL
    
    # write to output
    for n in range(GRAIN_LEN_SAMP):
        
        # overlapping part
        if n < OVERLAP_LEN:
            output_buffer[n] = (RESAMPLED_GRAIN[n] + PREVIOUS_DOWN_WINDOWED[n]) * MAX_VAL
            
        # non-overlapping part
        elif n < STRIDE:
            output_buffer[n] = RESAMPLED_GRAIN[n] * MAX_VAL
            
        # update state variables for next iterations
        else:
            PREVIOUS_DOWN_WINDOWED[n - buffer_len] = RESAMPLED_GRAIN[n]
            PREVIOUS_RAW[n - buffer_len] = GRAIN[n]
            


In [None]:
"""
Pitch shifting with granular synthesis for shift factors <=1.0
"""

""" User selected parameters """
input_wav = "speech.wav"
N = 20      # in milliseconds
a = 0.3    # grain overlap (0,1)
shift_factor = 0.99  # < 1.0

# open WAV file
samp_freq, signal = wavfile.read(input_wav)

signal = signal[:,1] # get one channel
data_type = signal.dtype
MAX_VAL = np.iinfo(data_type).max

# derived parameters
GRAIN_LEN_SAMP = ms2smp(N, samp_freq)
STRIDE = compute_stride(GRAIN_LEN_SAMP, a)
OVERLAP_LEN = GRAIN_LEN_SAMP-STRIDE

# allocate input and output buffers
input_buffer = np.zeros(STRIDE, dtype=data_type)
output_buffer = np.zeros(STRIDE, dtype=data_type)


"""
Nothing to touch after this!
"""

init(GRAIN_LEN_SAMP, a, shift_factor, data_type)
n_buffers = len(signal)//STRIDE
signal_proc = np.zeros(n_buffers*STRIDE, dtype=data_type)

for k in range(n_buffers):

    start_idx = k*STRIDE
    end_idx = (k+1)*STRIDE

    input_buffer = signal[start_idx:end_idx]
    process(input_buffer, output_buffer, STRIDE, GRAIN_LEN_SAMP, OVERLAP_LEN, MAX_VAL, shift_factor)
    signal_proc[start_idx:end_idx] = output_buffer

# write to WAV
file_name = "output_lpc_gran_synth.wav"
print("Result written to: %s" % file_name)
wavfile.write(file_name, samp_freq, signal_proc)

In [13]:
import numpy as np
import sounddevice as sd

"""
Real-time pitch shifting with granular synthesis for shift factors <=1.0
"""

""" User selected parameters """
N = 30
a = 0.2

# CHANGE THIS VALUE TO HAVE MORE/LESS BASS
shift_factor = 0.99

data_type = np.int16
samp_freq = 16000
float32 = np.float32

# derived parameters
MAX_VAL = np.iinfo(data_type).max
GRAIN_LEN_SAMP = ms2smp(N, samp_freq)

STRIDE = compute_stride(GRAIN_LEN_SAMP, a)
OVERLAP_LEN = GRAIN_LEN_SAMP-STRIDE

# allocate input and output buffers
input_buffer = np.zeros(STRIDE, dtype=data_type)
output_buffer = np.zeros(STRIDE, dtype=data_type)


"""
# Nothing to touch after this!
# """
try:
    sd.default.samplerate = 16000
    sd.default.blocksize = STRIDE
    sd.default.dtype = data_type
    print(data_type)

    def callback(indata, outdata, frames, time, status):
        if status:
            print(status)
        process(indata[:,0], outdata[:,0], frames, GRAIN_LEN_SAMP, OVERLAP_LEN, MAX_VAL,shift_factor)

    init(GRAIN_LEN_SAMP, a, shift_factor, data_type)
    
    with sd.Stream(channels=1, callback=callback):
        print('#' * 80)
        print('press Return to quit')
        print('#' * 80)
        input()
        
except KeyboardInterrupt:
    parser.exit('\nInterrupted by user')

<class 'numpy.int16'>
################################################################################
press Return to quit
################################################################################

