# Create a big dataset once and for all

In [11]:
import os
from multiprocessing import Process, Array
from pathlib import Path

import librosa
import librosa.display
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import soundfile as sf
from doepy import build
from mpl_toolkits.axes_grid1 import ImageGrid
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.svm import SVC
from tqdm.auto import tqdm

from modelisation_physique.mvt_Helmholtz import simulation

In [8]:
sim_length = 0.3
sample_rate = 22050
num_samples = 10000
celerity = 340
osc_threshold = 0.15  # threshold to decide whether there are oscillations or not
l2_penalty = 100  # L2 Penalty for the SVM optimization
epsilon_length = 0.05  # meters

# Full octave from C0 to B0
# note_frequencies = [65.41, 69.3, 73.42, 77.78, 82.41, 87.31, 92.5, 98, 103.83, 110, 116.54, 123.47]

note_frequencies = [65.41, 69.3, 73.42]

length_min = celerity / (4 * note_frequencies[-1]) - epsilon_length
length_max = celerity / (4 * note_frequencies[0]) + epsilon_length

print(f"The length of the cylinder goes from {length_min} to {length_max}")

The length of the cylinder goes from 1.1077226913647507 to 1.3494954899862408


In [9]:
zeta = 0.5
parameters = build.space_filling_lhs(
    {"gamma": [0, 1], 
     "length_cylinder": [length_min, length_max]},
    num_samples=num_samples
)

Let's go

In [13]:
data_folder = Path("./data")
data_folder.mkdir()

data = np.zeros(num_samples * sim_length * sample_rate)
shared_array = Array("d", data)

def run_simulation(shared_array, index, gamma, length_cylinder):
    waveform, _ = simulation(sim_length, sample_rate, gamma, zeta, length_cylinder, celerity)
    start = index * sim_length * sample_rate
    stop = (index + 1) * sim_length * sample_rate
    shared_array[start:stop] = waveform
    
# Instantiate all processes
procs = []
for idx, x in tqdm(parameters.iterrows()):
    proc = Process(target=run_simulation, args=(shared_array, idx, x["gamma"], x["length_cylinder"]))
    procs.append(proc)
    proc.start()
    
# Run all the processes
for proc in procs:
    proc.join()

np.save(data_folder.joinpath("data.npy"), np.frombuffer(shared_array.get_obj()).reshape((num_samples, -1)))

TypeError: expected a sequence of integers or a single integer, got '66150000.0'