***
*Project:* Helmholtz Machine on Niche Construction

*Author:* Jingwei Liu, Computer Music Ph.D., UC San Diego
***

# <span style="background-color:darkorange; color:white; padding:2px 6px">Experiment 3_2</span> 

# Real-Time Synthesis

*Created:* December 23, 2023

*Updated:* December 23, 2023

In [60]:
import numpy as np
import matplotlib.pyplot as plt
import librosa
from IPython.display import Audio
import pyaudio
import wave
import sys
import os
from pathlib import Path

Get generation from trained Helmholtz machine, for test here we use randomly generated data to substitute.

In [61]:
def random_generate(k,n,n_data,value_set):
    """
    The dataset is generated in a favor of Bayesian mixure of Gaussians. Given k mixture Gaussian components, we sample their 
    means u_1...u_k uniformly from [0,1]. Then we randomly assign each neuron to one of the components, and sample from its 
    Gaussian distribution (u_k, sigma). sigma is a hyperparameter, we default it to 1.
    
    The "Bayesian mixure of Gaussians" generation is just a way to generate dataset with non-singular distributions. The 
    generated data distribution is not identified with the mixure of Gaussian distributions that generated it. In other words, 
    the data is treated as sole evidence without any prior on how it's been generated thus its reconstruction is not convolved 
    with it's generative distributions, which is a major difference from varietional inference.
        
    Arguments:
    k -- number of Gaussian components
    n -- length of input layer (single data point)
    n_data -- number of datapoints to generate
    value_set -- list or array [a,b], where a is the positive outcome and b is the negative outcome of a Bernoulli experiment
    
    Returns:
    random_set -- generated dataset, numpy array of shape (n,n_data), n_data is the number of datapoints in the generated dataset
    """
    
    u = np.random.rand(k,)
    c = np.random.randint(k, size=(n,1))
    mean = u[c]
    prob = np.random.randn(n,n_data) + mean
    random_set = (prob>0.5).astype(int)
    
    a = value_set[0]
    b = value_set[1]
    random_set = random_set *(a-b)+b
    
    return random_set

In [62]:
n = 15 # length of generated single instance

In [63]:
data = random_generate(5,n,1,[1,0])
data

array([[0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0]])

In [64]:
division = 4
n_instr = int(n/division+0.5)
n_instr

4

In [65]:
def intrument_choice(n_instr):
    # n_instr at most 8
    
    Instruments = [['kick','Tom'],['snare','clap'],['hihat closed','wood'],['ride','hihat open']]
    instr_choice = ['']*n_instr
    
    choice1_group = np.random.choice(4, 4, replace=False, p = [0.4, 0.3, 0.2, 0.1])
    choice_index = np.random.randint(2,size = 4)
    choice1_inst = choice_index[choice1_group]
    choice2_group = np.random.choice(4, 4, replace=False)
    choice2_inst = np.mod(choice_index+1,2)[choice2_group]
    choice = np.array([np.append(choice1_group,choice2_group),np.append(choice1_inst,choice2_inst)])
    
    for i in range(n_instr):
        instr_choice[i] = Instruments[choice[0,i]][choice[1,i]]
            
    return instr_choice

In [66]:
instr_choice = intrument_choice(n_instr)
instr_choice

['clap', 'Tom', 'wood', 'hihat open']

### Preparation

In [67]:
instr0, fs = librosa.load('Instruments/'+instr_choice[0]+'.wav')
instr0

array([-5.2706543e-02,  6.8760648e-02, -1.4342493e-01, ...,
        8.6916843e-06, -2.4170677e-06,  9.0632864e-07], dtype=float32)

In [68]:
len(instr0)

40753

In [69]:
fs

22050

In [70]:
txt_folder = Path('Instruments').rglob('*.wav')
Instruments = []
for x in txt_folder:
    basename = os.path.basename(x)
    filename = os.path.splitext(basename)[0]
    Instruments.append(filename)
Instruments

['clap', 'hihat closed', 'hihat open', 'kick', 'ride', 'snare', 'Tom', 'wood']

In [71]:
instruments_sound = {}
for i in range(len(Instruments)):
    instruments_sound[Instruments[i]],fs = librosa.load('Instruments/'+Instruments[i]+'.wav')
instruments_sound

{'clap': array([-5.2706543e-02,  6.8760648e-02, -1.4342493e-01, ...,
         8.6916843e-06, -2.4170677e-06,  9.0632864e-07], dtype=float32),
 'hihat closed': array([ 8.7055489e-02,  3.4345269e-01, -4.2524356e-01, ...,
         5.6425459e-05,  1.7882790e-05, -3.8322993e-05], dtype=float32),
 'hihat open': array([ 9.8636545e-02,  2.8145188e-01, -1.9611624e-01, ...,
        -7.8135054e-07,  1.1714292e-07,  1.0206713e-07], dtype=float32),
 'kick': array([-0.00218478, -0.00930428, -0.01938734, ..., -0.00053879,
        -0.00056686, -0.00048317], dtype=float32),
 'ride': array([-4.1870825e-02,  2.7795248e-02, -1.4001541e-02, ...,
         3.8695944e-05,  6.7816058e-05, -1.5612140e-04], dtype=float32),
 'snare': array([ 7.6284137e-04,  9.2353951e-03,  7.4334239e-04, ...,
        -2.6067326e-05, -1.1731618e-05,  0.0000000e+00], dtype=float32),
 'Tom': array([-1.0707259e-02, -1.9491711e-01, -3.7550956e-01, ...,
         1.3712779e-06,  7.7723962e-07,  1.7710408e-07], dtype=float32),
 'wood': a

In [72]:
fs

22050

In [73]:
# length of each instrument sample in second
for key in instruments_sound:
    print(key + ': length ' + str(len(instruments_sound[key])/fs) + ' max ' + str(np.max(np.abs(instruments_sound[key]))))

clap: length 1.8482086167800453 max 0.9290812
hihat closed: length 0.10036281179138322 max 0.47279972
hihat open: length 0.402312925170068 max 0.58894074
kick: length 0.6745124716553288 max 0.9854314
ride: length 2.492471655328798 max 0.90317786
snare: length 0.4832199546485261 max 0.9755846
Tom: length 0.4431292517006803 max 0.98359084
wood: length 0.6971428571428572 max 0.98792285


In [82]:
BPM = 133

In [83]:
BS = 60/BPM # beat length in seconds
BS

0.45112781954887216

In [84]:
y_beat = int(BS * fs + 0.5)
y_beat

9947

In [88]:
len_output = int(BS * fs * division + 0.5) 
len_output

39789

In [89]:
y_out = np.zeros((len_output*2,))
y_out

array([0., 0., 0., ..., 0., 0., 0.])

In [91]:
len(y_out)/8

9947.25

### Synthesis

In [200]:
instr_choice

['hihat closed', 'wood', 'kick', 'snare']

In [261]:
y_out = np.zeros((len_output*2,))
y_out

array([0., 0., 0., ..., 0., 0., 0.])

In [262]:
deviation = 800 # sample rate

In [263]:
for i in range(n_instr-1):
    notes = data[i*division:(i+1)*division]
    y_instr = instruments_sound[instr_choice[i]]
    for j in range(division):
        if notes[j] != 0:
            randomize = int(np.random.randn()*deviation)
            st = np.max([0,y_beat*j+randomize])
            y_out[st:st+len(y_instr)] += y_instr*np.random.rand()
    
i = i+1
notes = data[i*division:]
k = len(notes)
y_instr = instruments_sound[instr_choice[i]]
for j in range(k):
    pos = np.random.choice(division, k, replace=False)
    if notes[j] != 0:
        randomize = int(np.random.randn()*deviation)
        st = np.max([0,y_beat*pos[j]+randomize])
        y_out[st:st+len(y_instr)] += y_instr*np.random.rand()

In [267]:
np.abs(y_out).max()

0.6278371458174661

In [268]:
Audio(data=y_out, rate=fs)

In [258]:
np.abs(y_out).max()

0.7510181127581745

In [259]:
y_out.max()

0.7510181127581745

### Real-Time

In [239]:
data = random_generate(5,n,1,[1,0])
data

array([[0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0]])

In [44]:
import time

In [243]:
time.perf_counter()

85075.2867729

In [245]:
time.sleep(1)

In [247]:
tic = time.perf_counter()
time.sleep(1)
toc = time.perf_counter()
t_elapsed = toc - tic
t_elapsed

1.000365799991414

In [248]:
tic = time.perf_counter()
data = random_generate(5,n,1,[1,0])
toc = time.perf_counter()
t_elapsed = toc - tic
t_elapsed

0.0006716000061715022

In [269]:
y_out

array([0.00045131, 0.00546384, 0.00043978, ..., 0.        , 0.        ,
       0.        ])

In [270]:
y_out.max()

0.6278371458174661

In [27]:
y_out = np.zeros((80*2,))

In [33]:
yy = (y_out[:80] * 32768+0.5).astype('int16')
yy[1] = 9

In [34]:
np.any(yy!=0)

True

In [273]:
bytestream = yy.tobytes()
pya = pyaudio.PyAudio()
stream = pya.open(format=pya.get_format_from_width(width=2), channels=1, rate=fs, output=True)

for i in range(5):
    stream.write(bytestream)
    
stream.stop_stream()    
stream.close()
pya.terminate()

In [45]:
import threading

In [2]:
event = threading.Event()

In [3]:
l = threading.Lock()
l

<unlocked _thread.lock object at 0x000002B8462344C0>

In [4]:
l.acquire()
l

<locked _thread.lock object at 0x000002B8462344C0>

In [5]:
l.release()
l

<unlocked _thread.lock object at 0x000002B8462344C0>

In [48]:
def streaming(threadname):
    pya = pyaudio.PyAudio()
    stream = pya.open(format=pya.get_format_from_width(width=2), channels=1, rate=fs, output=True)

    while np.any(yy!=0):
        stream.write(bytestream)

    stream.stop_stream()    
    stream.close()
    pya.terminate()

In [36]:
def control(threadname,event):
    global yy, bytestream

RealPythonIsGreat!


In [None]:
event = threading.Event()
event.set()

thread1 = threading.Thread(target=streaming, args=("Thread-1", ) )
# thread2 = threading.Thread(target=thread2, args=("Thread-2", ) )

thread1.start()
# thread2.start()
thread1.join()
# thread2.join()

In [51]:
a = 0
def thread1(threadname):
    global a
    while a < 10:
        a += 1
        time.sleep(0.5)

def thread2(threadname):
    while a < 10:
        print(a)
        time.sleep(1)

thread1 = threading.Thread(target=thread1, args=("Thread-1", ) )
thread2 = threading.Thread(target=thread2, args=("Thread-2", ) )

thread1.start()
thread2.start()
thread1.join()
thread2.join()

1
3
5
7
9
