In [2]:
import numpy as np
import sounddevice as sd
from scipy.io import wavfile
from matplotlib import pyplot as plt

In [3]:
def ms2smp(ms, fs):
    """
    Parameters
    ----------
    ms: float
        Time in milliseconds
    fs: float
        Sampling rate in Hz.
    """
    # seconds = ms/1000
    return int(fs*ms/1000.)

In [4]:
def win_taper(N, a, data_type=np.int16):

    """
    Parameters
    ----------
    N: the length of the grain (in samples)
    a: a double between 0 and 1 representing the fraction of the N samples that will be attenuated
        (a/2 samples are attenuated on both sides)
    data_type: the data type of the output
        
    output: a profile represented by values that span the entire positive range of "data_type", that will modify the sound
    samples at the beginning and the start of grains so that we can make grains overlap without problem."""
    
    
    # Number of samples that are attenuated on each side
    nb_attenuated = int(N * a / 2)
    
    # Create the increasing "ramp"
    ramp = np.arange(0, nb_attenuated) / float(nb_attenuated)
    
    # Create the final profile by concatenating increasing ramp, untouched samples and decreasing ramp
    win = np.concatenate((ramp, 
        np.ones(N-2*nb_attenuated), 
        ramp[::-1]))
    
    # The maximum value that can be represented using this datatype
    max_val = np.iinfo(data_type).max

    # Make sure that the correct type is returned
    return (win*max_val).astype(data_type)

In [5]:
def compute_stride(N, a):
    return N - int(N * a / 2) - 1

In [6]:
def build_linear_interp_table(n_samples, down_fact, data_type=np.int16):
    
    #previous existing sample
    which_samples = []
    #fractional amplitude.
    amplitudes = []
    for n in range(n_samples):
        
        # The interpolation time
        t = n*down_fact
        
        # The largest integer smaller than t
        N = np.floor(t)
        
        # The amplitude that should have this latest sample
        # (if t = 1.01 s then the amplitude of the sample at time N=1 sould be 0.99)
        a = 1-(t-N)
        
        which_samples.append(int(N))
        amplitudes.append(a)

    MAX_VAL = np.iinfo(data_type).max
    
    # Set the amplitudes in the range defined by the data_type
    amplitudes = (np.array(amplitudes)*MAX_VAL).astype(data_type)

    return which_samples, amplitudes

### Main cell for file processing

### Here is the processing for a file

In [24]:
import numpy as np
from scipy.io import wavfile
"""
Pitch shifting with granular synthesis for shift factors <=1.0
"""

""" User selected parameters """
input_wav = "speech.wav"
N = 20      # in milliseconds
a = 0.3    # grain overlap (0,1)
shift_factor = 0.3  # <= 1.0

# open WAV file
samp_freq, signal = wavfile.read(input_wav)

signal = signal[:,1] # get first channel
data_type = signal.dtype
MAX_VAL = np.iinfo(data_type).max

sigtype = np.float32

# derived parameters
GRAIN_LEN_SAMP = ms2smp(N, samp_freq)
STRIDE = compute_stride(GRAIN_LEN_SAMP, a)
OVERLAP_LEN = GRAIN_LEN_SAMP-STRIDE

# allocate input and output buffers
input_buffer = np.zeros(STRIDE, dtype=data_type)
output_buffer = np.zeros(STRIDE, dtype=data_type)

# state variables and constants
def init():

    # lookup table for tapering window
    global WIN
    WIN = win_taper(GRAIN_LEN_SAMP, a, data_type)

    # lookup table for linear interpolation
    global SAMP_VALS
    global AMP_VALS
    SAMP_VALS, AMP_VALS = build_linear_interp_table(GRAIN_LEN_SAMP, shift_factor, data_type)
    
    # create arrays to pass between buffers (state variables)
    global PREVIOUS_RAW
    PREVIOUS_RAW = np.zeros(int(GRAIN_LEN_SAMP*a/2) + 1, dtype=sigtype)
    global PREVIOUS_DOWN_WINDOWED
    PREVIOUS_DOWN_WINDOWED = np.zeros(int(GRAIN_LEN_SAMP*a/2) + 1, dtype=sigtype)
    
    # create arrays for intermediate values
    global GRAIN
    GRAIN = np.zeros(GRAIN_LEN_SAMP,dtype=sigtype)
    global RESAMPLED_GRAIN
    RESAMPLED_GRAIN = np.zeros(GRAIN_LEN_SAMP, dtype=sigtype)


# the process function!
def process(input_buffer, output_buffer, buffer_len):

    # need to specify those global variables changing in this function (state variables and intermediate values)
    global PREVIOUS_RAW
    global PREVIOUS_DOWN_WINDOWED
    global GRAIN
    global SAMP_VALS
    global AMP_VALS
    global RESAMPLED_GRAIN
    global WIN
    
    # First we need to convert the input_buffer to float32 between 0 and 1
#     signal = signal.astype(sigtype)
#     signal = signal / MAX_VAL

    # append samples from previous buffer
    for n in range(GRAIN_LEN_SAMP):
        if n < OVERLAP_LEN:
            GRAIN[n] = PREVIOUS_RAW[n]
        else:
            GRAIN[n] = np.float32(input_buffer[n - OVERLAP_LEN] ) / MAX_VAL

    # resample
    for n in range(GRAIN_LEN_SAMP):
        coeff = np.float32(AMP_VALS[n])/ MAX_VAL
        prev_sample = GRAIN[SAMP_VALS[n]]
        next_sample = GRAIN[SAMP_VALS[n]+1]
        RESAMPLED_GRAIN[n] = coeff * prev_sample + (1-coeff) * next_sample

    # apply window
    for n in range(GRAIN_LEN_SAMP):
        RESAMPLED_GRAIN[n] = RESAMPLED_GRAIN[n] * np.float32(WIN[n]) / MAX_VAL
    
    # write to output
    for n in range(GRAIN_LEN_SAMP):
        # overlapping part
        if n < OVERLAP_LEN:
            output_buffer[n] = (RESAMPLED_GRAIN[n] + PREVIOUS_DOWN_WINDOWED[n]) * MAX_VAL
        # non-overlapping part
        elif n < STRIDE:
            output_buffer[n] = RESAMPLED_GRAIN[n] * MAX_VAL
        # update state variables
        else:
            PREVIOUS_DOWN_WINDOWED[n - STRIDE] = RESAMPLED_GRAIN[n]
            PREVIOUS_RAW[n - STRIDE] = GRAIN[n]
"""
Nothing to touch after this!
"""
init()
# simulate block based processing
n_buffers = len(signal)//STRIDE
signal_proc = np.zeros(n_buffers*STRIDE, dtype=data_type)
for k in range(n_buffers):

    # sample indices
    start_idx = k*STRIDE
    end_idx = (k+1)*STRIDE

    # index the appropriate samples
    
    
    input_buffer = signal[start_idx:end_idx]
    process(input_buffer, output_buffer, STRIDE)
    signal_proc[start_idx:end_idx] = output_buffer

# write to WAV
file_name = "output_gran_synth.wav"
print("Result written to: %s" % file_name)
wavfile.write(file_name, samp_freq, signal_proc)



1.0
0.699972533341
0.399975585192
0.0999786370434
0.799981688894
0.499984740745
0.199987792596
0.899990844447
0.599993896298
0.299996948149
1.0
0.699972533341
0.399975585192
0.0999786370434
0.799981688894
0.499984740745
0.199987792596
0.899990844447
0.599993896298
0.299996948149
1.0
0.699972533341
0.399975585192
0.0999786370434
0.799981688894
0.499984740745
0.199987792596
0.899990844447
0.599993896298
0.299996948149
1.0
0.699972533341
0.399975585192
0.0999786370434
0.799981688894
0.499984740745
0.199987792596
0.899990844447
0.599993896298
0.299996948149
1.0
0.699972533341
0.399975585192
0.0999786370434
0.799981688894
0.499984740745
0.199987792596
0.899990844447
0.599993896298
0.299996948149
1.0
0.699972533341
0.399975585192
0.0999786370434
0.799981688894
0.499984740745
0.199987792596
0.899990844447
0.599993896298
0.299996948149
1.0
0.699972533341
0.399975585192
0.0999786370434
0.799981688894
0.499984740745
0.199987792596
0.899990844447
0.599993896298
0.299996948149
1.0
0.699972533341
0

### Here is the processing in real time

In [26]:
import numpy as np
import sounddevice as sd

"""
Real-time pitch shifting with granular synthesis for shift factors <=1.0
"""

""" User selected parameters """
N = 30
a = 0.2
shift_factor = 0.7 
data_type = np.int16
samp_freq = 16000
sigtype = np.float32

# derived parameters
MAX_VAL = np.iinfo(data_type).max
GRAIN_LEN_SAMP = ms2smp(N, samp_freq)
STRIDE = compute_stride(GRAIN_LEN_SAMP, a)
OVERLAP_LEN = GRAIN_LEN_SAMP-STRIDE

# allocate input and output buffers
input_buffer = np.zeros(STRIDE, dtype=data_type)
output_buffer = np.zeros(STRIDE, dtype=data_type)

# state variables and constants
def init():

    # lookup table for tapering window
    global WIN
    WIN = win_taper(GRAIN_LEN_SAMP, a, data_type)

    # lookup table for linear interpolation
    global SAMP_VALS
    global AMP_VALS
    SAMP_VALS, AMP_VALS = build_linear_interp_table(GRAIN_LEN_SAMP, shift_factor, data_type)
    
    # create arrays to pass between buffers (state variables)
    global PREVIOUS_RAW
    PREVIOUS_RAW = np.zeros(int(GRAIN_LEN_SAMP*a/2) + 1, dtype=sigtype)
    global PREVIOUS_DOWN_WINDOWED
    PREVIOUS_DOWN_WINDOWED = np.zeros(int(GRAIN_LEN_SAMP*a/2) + 1, dtype=sigtype)
    
    # create arrays for intermediate values
    global GRAIN
    GRAIN = np.zeros(GRAIN_LEN_SAMP,dtype=sigtype)
    global RESAMPLED_GRAIN
    RESAMPLED_GRAIN = np.zeros(GRAIN_LEN_SAMP, dtype=sigtype)


# the process function!
def process(input_buffer, output_buffer, buffer_len):

    # need to specify those global variables changing in this function (state variables and intermediate values)
    global PREVIOUS_RAW
    global PREVIOUS_DOWN_WINDOWED
    global GRAIN
    global SAMP_VALS
    global AMP_VALS
    global RESAMPLED_GRAIN
    global WIN
    
    # First we need to convert the input_buffer to float32 between 0 and 1
#     signal = signal.astype(sigtype)
#     signal = signal / MAX_VAL

    # append samples from previous buffer
    for n in range(GRAIN_LEN_SAMP):
        if n < OVERLAP_LEN:
            GRAIN[n] = PREVIOUS_RAW[n]
        else:
            GRAIN[n] = np.float32(input_buffer[n - OVERLAP_LEN] ) / MAX_VAL

    # resample
    for n in range(GRAIN_LEN_SAMP):
        coeff = np.float32(AMP_VALS[n])/ MAX_VAL
        prev_sample = GRAIN[SAMP_VALS[n]]
        next_sample = GRAIN[SAMP_VALS[n]+1]
        RESAMPLED_GRAIN[n] = coeff * prev_sample + (1-coeff) * next_sample

    # apply window
    for n in range(GRAIN_LEN_SAMP):
        RESAMPLED_GRAIN[n] = RESAMPLED_GRAIN[n] * np.float32(WIN[n]) / MAX_VAL
    
    # write to output
    for n in range(GRAIN_LEN_SAMP):
        # overlapping part
        if n < OVERLAP_LEN:
            output_buffer[n] = (RESAMPLED_GRAIN[n] + PREVIOUS_DOWN_WINDOWED[n]) * MAX_VAL
        # non-overlapping part
        elif n < STRIDE:
            output_buffer[n] = RESAMPLED_GRAIN[n] * MAX_VAL
        # update state variables
        else:
            PREVIOUS_DOWN_WINDOWED[n - STRIDE] = RESAMPLED_GRAIN[n]
            PREVIOUS_RAW[n - STRIDE] = GRAIN[n]


"""
# Nothing to touch after this!
# """
try:
    sd.default.samplerate = 16000
    sd.default.blocksize = STRIDE
    sd.default.dtype = data_type
    print(data_type)

    def callback(indata, outdata, frames, time, status):
        if status:
            print(status)
        process(indata[:,0], outdata[:,0], frames)

    init()
    with sd.Stream(channels=1, callback=callback):
        print('#' * 80)
        print('press Return to quit')
        print('#' * 80)
        input()
except KeyboardInterrupt:
    parser.exit('\nInterrupted by user')

<class 'numpy.int16'>
################################################################################
press Return to quit
################################################################################



In [11]:
bbb = 456