In [1]:
# 環境構築

!apt-get update
!apt-get install -y libsndfile1-dev
!pip install scipy
!pip install scikit-learn
!pip install librosa

/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
Reading package lists... Done
E: Could not open lock file /var/lib/apt/lists/lock - open (13: Permission denied)
E: Unable to lock directory /var/lib/apt/lists/
W: Problem unlinking the file /var/cache/apt/pkgcache.bin - RemoveCaches (13: Permission denied)
W: Problem unlinking the file /var/cache/apt/srcpkgcache.bin - RemoveCaches (13: Permission denied)
/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
E: Could not open lock file /var/lib/dpkg/lock-frontend - open (13: Permission denied)
E: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?
/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /home/a

In [2]:
# clips 作成

SAMPLING_RATE = 44100 # 変更不可

clip_size = 81920 # 楽曲を再構築するパーツ1つあたりの大きさ
step_size = 20480 # clip をずらすときの大きさ
window_size = 10240 # CQT_CHROMA を取得するのに使用するサンプル数
hop_size = 640 # window をずらすときの大きさ

from scipy.io.wavfile import read, write
import glob
import os
import numpy as np
import librosa
import sys

if os.path.isfile("../data/out/clips.wav"):
    print("../data/out/clips.wav exists.")
    files = ["../data/out/clips.wav"]
else:
    # clips.npy をもとにデータを作成する 
    if os.path.isfile("../data/arrays/clips.npy"):
        print("loading ../data/arrays/clips.npy ...")
        clips = np.load("../data/arrays/clips.npy")
        print("creating ../data/out/clips.wav ...")
        write("../data/out/clips.wav", SAMPLING_RATE, clips.reshape((clips.shape[0] * clips.shape[1], )))
        files = ["../data/out/clips.wav"]
        
    # clips,npy がないとき /data/wav44100 内の WAV ファイルを参照する
    else:
        files = glob.glob("../data/wav44100/*.wav")
        #files = ["../data/wav44100/3DEmbodimentFromLines.wav"] # デバッグ用 
        
if len(files) == 0:
    print("WAV ファイルが見つかりませんでした。")
    sys.exit(1)
        
clips_filename = "../data/arrays/c" + str(clip_size) + "_s" + str(step_size) + "_f32_clips"

if os.path.isfile(clips_filename + ".npy"):
    print("loading " + clips_filename + ".npy ...")
    clips = np.load(clips_filename + ".npy")
else:
    print("creating " + clips_filename + ".npy ...")
    raw_data_list = [librosa.load(file, sr=SAMPLING_RATE)[0] for file in files] # 左の音だけ使う

    clips = np.zeros((0, clip_size), dtype=np.float32)
    for raw_data in raw_data_list:
        tmp = [raw_data[i:i + clip_size] for i in range(0, len(raw_data) - clip_size, step_size)]
        clips = np.vstack((clips, np.array(tmp)))
    np.save(clips_filename, clips)

print("The clip array has " + str(clips.shape[0]) + " clips.")

../data/out/clips.wav exists.
loading ../data/arrays/c81920_s20480_f32_clips.npy ...
The clip array has 57148 clips.


In [3]:
# cqt 作成

n_bins = 84

cqt_filename = clips_filename + "_w" + str(window_size) + "_h" + str(hop_size) + "_cqts"

if os.path.isfile(cqt_filename + ".npy"):
    print("loading " + cqt_filename + ".npy ...")
    cqts = np.load(cqt_filename + ".npy")
else:
    print("creating " + cqt_filename + ".npy ...")
    num = 0
    for clip in clips:
        print("cqt progress: clip " + str(num + 1) + " / " + str(len(clips)))
        
        tmp_cqt = librosa.cqt(clip, sr=SAMPLING_RATE, hop_length=hop_size, n_bins=n_bins)
        tmp_cqt = tmp_cqt.reshape((1, tmp_cqt.shape[0], tmp_cqt.shape[1]))
        
        if num == 0:
            cqts = tmp_cqt
        else:
            cqts = np.vstack((cqts, tmp_cqt))
            
        num += 1

    np.save(cqt_filename, cqts)
    
print("The cqt array has " + str(cqts.shape[0]) + " cqts.")
print("cqts.shape: " + str(cqts.shape))
print("Type(cqts[0][0][0]): " + str(type(cqts[0][0][0])))
print("np.max(cqts[0][0]): " + str(np.max(cqts[0][0])))

loading ../data/arrays/c81920_s20480_f32_clips_w10240_h640_cqts.npy ...
The cqt array has 57148 cqts.
cqts.shape: (57148, 84, 129)
Type(cqts[0][0][0]): <class 'numpy.complex64'>
np.max(cqts[0][0]): (0.021538047+0.007131239j)


In [4]:
# cqt_choroma 作成

def Preprocessing(array):
    array = np.abs(array)
    array = np.log(array + 1)
    array = array / np.log(np.finfo(np.float32).max)
    array = array.T
    return array

cqt_chroma_filename = clips_filename + "_w" + str(window_size) + "_h" + str(hop_size) + "_cqt_chromas"

if os.path.isfile(cqt_chroma_filename + ".npy"):
    print("loading " + cqt_chroma_filename + ".npy ...")
    cqt_chromas= np.load(cqt_chroma_filename + ".npy")
else:
    print("creating " + cqt_chroma_filename + ".npy ...")
    num = 0
    for cqt in cqts:
        print("cqt_chroma progress: clip " + str(num + 1) + " / " + str(len(cqts)))
        
        tmp_cqt_chroma = librosa.feature.chroma_cqt(C=cqt, sr=SAMPLING_RATE)
        tmp_cqt_chroma = Preprocessing(tmp_cqt_chroma)
        tmp_cqt_chroma = tmp_cqt_chroma.reshape((1, tmp_cqt_chroma.shape[0], tmp_cqt_chroma.shape[1]))
        
        if num == 0:
            cqt_chromas = tmp_cqt_chroma
        else:
            cqt_chromas = np.vstack((cqt_chromas, tmp_cqt_chroma))
            
        num += 1

    np.save(cqt_chroma_filename, cqt_chromas)
    
print("The cqt_chroma array has " + str(cqt_chromas.shape[0]) + " cqt_chromas.")
print("cqt_chromas.shape: " + str(cqt_chromas.shape)) # clip 番号、window 番号、 chroma 番号 になる
print("Type(cqt_chromas[0][0][0]): " + str(type(cqt_chromas[0][0][0])))
print("np.max(cqt_chromas[0][0]): " + str(np.max(cqt_chromas[0][0])))

loading ../data/arrays/c81920_s20480_f32_clips_w10240_h640_cqt_chromas.npy ...
The cqt_chroma array has 57148 cqt_chromas.
cqt_chromas.shape: (57148, 129, 12)
Type(cqt_chromas[0][0][0]): <class 'numpy.float32'>
np.max(cqt_chromas[0][0]): 0.0078125


In [5]:
# max_norm 作成

num_divide = 8

max_norm_filename = clips_filename + "_d" + str(num_divide) + "_max_norms"

if os.path.isfile(max_norm_filename + ".npy"):
    print("loading " + max_norm_filename + ".npy ...")
    max_norms= np.load(max_norm_filename + ".npy")
else:
    print("creating " + max_norm_filename + ".npy ...")
    num = 0
    for clip in clips:
        print("max_norm progress: clip " + str(num + 1) + " / " + str(len(clips)))
        max_norm = [np.max(clip[int((i / num_divide) * len(clip)):int(((i + 1) / num_divide) * len(clip))] ** 2) for i in range(num_divide)]
    
        if num == 0:
            max_norms = np.array(max_norm)
        else:
            max_norms = np.vstack((max_norms, np.array(max_norm)))
        num += 1
        
    np.save(max_norm_filename, max_norms)
    
print("The max_norm array has " + str(max_norms.shape[0]) + " max_norms.")
print("max_norms.shape: " + str(max_norms.shape))
print("Type(max_norms[0][0]): " + str(type(max_norms[0][0])))
print("np.max(max_norms[0]): " + str(np.max(max_norms[0])))

loading ../data/arrays/c81920_s20480_f32_clips_d8_max_norms.npy ...
The max_norm array has 57148 max_norms.
max_norms.shape: (57148, 8)
Type(max_norms[0][0]): <class 'numpy.float32'>
np.max(max_norms[0]): 0.0179649


In [6]:
# 学習データとテストデータに分ける

def normalize(array):
    if (np.sum(array) == 0):
        return array
    else:
        return array / np.sum(array)
    
cqt_chroma_sum_threshold = 0.01 # 次の window の sum が閾値に満たないときに除外します
test_data_rate = 0.1
weight = 0.01 # max_norm の重み

window_num_per_clip = cqt_chromas.shape[1]

cqt_chromas = np.array([np.hstack((cqt_chromas[i], np.repeat(np.array([max_norms[i]]), cqt_chromas.shape[1], axis=0) * weight)) for i in range(len(max_norms))])
cqt_chromas = np.concatenate([cqt_chromas[:-1, :, :], cqt_chromas[1:, 0, :].reshape(cqt_chromas.shape[0] - 1, 1, cqt_chromas.shape[2])], 1)

p = np.random.permutation(len(cqt_chromas))
cqt_chromas = cqt_chromas[p]
clips = clips[p]

x = np.delete(cqt_chromas, np.where(np.sum(cqt_chromas[:,-1,:], axis=1) < cqt_chroma_sum_threshold)[0], axis=0)

x_test = x[:int(x.shape[0] * test_data_rate), :window_num_per_clip, :].reshape(int(x.shape[0] * test_data_rate), window_num_per_clip, x.shape[2], 1)
x_train = x[int(x.shape[0] * test_data_rate):, :window_num_per_clip, :].reshape(x.shape[0] - int(x.shape[0] * test_data_rate), window_num_per_clip, x.shape[2], 1)
y_tests =  np.array([x[:int(x.shape[0] * test_data_rate), window_num_per_clip, i] for i in range(x.shape[2])])
y_trains = np.array([x[int(x.shape[0] * test_data_rate):, window_num_per_clip, i] for i in range(x.shape[2])])

print("x_train.shape: " + str(x_train.shape))
print("x_test.shape: " + str(x_test.shape))
print("y_trains.shape: " + str(y_trains.shape))
print("y_tests.shape: " + str(y_tests.shape))

x_train.shape: (51287, 129, 20, 1)
x_test.shape: (5698, 129, 20, 1)
y_trains.shape: (20, 51287)
y_tests.shape: (20, 5698)


In [8]:
# 楽曲の出力

batch_size = 256
epochs = 100

data_name = "c" + str(clip_size) + "_s" + str(step_size) + "_w" + str(window_size) + "_h" + str(hop_size) + "_d" + str(num_divide)

import sklearn
from sklearn.metrics.pairwise import cosine_similarity
from keras.models import load_model
import soundfile as sf

def add_fade(x, fadetime, sr):
    ft_len = int(fadetime*sr)
    r = np.arange(0, ft_len)*np.pi/ft_len
    w_fo = (0.5+0.5*np.cos(r))**0.5
    w_fi = (0.5-0.5*np.cos(r))**0.5
    
    x[0:ft_len]        *= w_fi
    x[clip_size-ft_len::] *= w_fo
    return x

def gen_xfade(x_pre, x_next, fadetime, sr):
    ft_len = int(fadetime*sr)
    if x_pre is None:
        xfade = x_next
    else:
        x_pre_begin = x_pre[:x_pre.shape[0] - ft_len]
        x_pre_end = x_pre[x_pre.shape[0] - ft_len:]
        x_pre_len = clip_size
        x_next_len = clip_size
        x_pre_len -= ft_len
        x_next_len -= ft_len
        xfade = np.concatenate((x_pre_begin, np.concatenate((x_pre_end, np.zeros(x_next_len))) + x_next))
    return xfade

def create_music_consider_weights(fname):
    first_index = np.random.randint(0, len(cqt_chromas))
    predict_index = first_index
    out = None
    
    for i in range(num_clips):
        print("-- generate " + str(i + 1) + " / " + str(num_clips))
        predict_vec = np.zeros((0, ))
        for i in range(x.shape[2]):
            predict = models[i].predict(np.array([cqt_chromas[predict_index, :-1]]))[0]
            predict_vec = np.hstack((predict_vec, predict))
        predict_chroma = predict_vec[:len(predict_vec)-num_divide]
        predict_rhythm = predict_vec[len(predict_vec)-num_divide:]

        index = 0
        similar_index = 0
        score = -1 - rhythm_weight
        chroma_score = 0
        rhythm_score = 0
        for cqt_chroma in cqt_chromas[:, 0]:
            cqt_chroma_chroma = cqt_chroma[:len(cqt_chroma)-num_divide]
            cqt_chroma_rhythm = cqt_chroma[len(cqt_chroma)-num_divide:]
            tmp_chroma_score = cosine_similarity(np.array([predict_chroma]), np.array([cqt_chroma_chroma]))
            tmp_rhythm_score = cosine_similarity(np.array([predict_rhythm]), np.array([cqt_chroma_rhythm]))
            tmp_score = tmp_chroma_score + rhythm_weight * tmp_rhythm_score
            if tmp_score > score:
                score = tmp_score
                similar_index = index
            index += 1

        print("score: " + str(score))
        print("predict_vec: " + str(predict_vec))
        print("cqt_chromas[similar_index]: " + str(cqt_chromas[similar_index, 0]))
        print("similar_index: " + str(similar_index))
        print("--")

        predict_index = similar_index

        tmp = add_fade(clips[predict_index], 0.1, SAMPLING_RATE)
        out = gen_xfade(out, tmp, 0.1, SAMPLING_RATE)
        
    sf.write(fname, out, SAMPLING_RATE, subtype="PCM_16")

rhythm_weight = 0.1

models = []
for i in range(x.shape[2]):
    model_name = "independent_" + data_name + "_batch" + str(batch_size) + "_e" + str(epochs) + "_" + str(i)
    
    models.append(load_model("../data/models/" + model_name))

file_name = "out_" + "independent_" + data_name + "_batch" + str(batch_size) + "_e" + str(epochs) + "_rw" + str(rhythm_weight).replace('.', '_')
num_clips = 10

for i in range(10):
    fname = "../data/out/" + file_name + "_track" + str(i) + ".wav"
    print("creating " + fname + " ...")
    create_music_consider_weights(fname)

creating ../data/out/out_independent_c81920_s20480_w10240_h640_d8_batch256_e100_rw0_1_0.wav ...
-- generate 1 / 10
score: [[1.06041582]]
predict_vec: [2.30637073e-04 2.26915884e-03 2.14477116e-03 3.32733430e-03
 9.75346658e-04 1.26267341e-03 1.12311286e-03 1.87775888e-03
 2.04709894e-03 2.61096307e-03 6.64237747e-03 1.00903981e-03
 1.95932706e-04 1.42103017e-04 8.30840581e-05 3.28086579e-04
 1.15615432e-04 1.32207788e-04 1.39547000e-02 4.58785258e-02]
cqt_chromas[similar_index]: [0.0000000e+00 2.4336695e-03 2.8553980e-03 3.8678383e-03 0.0000000e+00
 0.0000000e+00 0.0000000e+00 3.1435257e-03 3.3113102e-03 3.8201667e-03
 7.8125000e-03 0.0000000e+00 6.2136809e-05 5.4137043e-05 7.3957475e-05
 4.8328973e-05 9.3115901e-05 1.6366005e-04 6.7811541e-04 1.3572460e-03]
similar_index: 8101
--
-- generate 2 / 10
score: [[1.05259278]]
predict_vec: [0.00147256 0.00320549 0.00116541 0.00196898 0.00241183 0.00240879
 0.00176573 0.00112944 0.0019895  0.00301866 0.01122173 0.00063552
 0.00021361 0.000150

score: [[1.04006707]]
predict_vec: [1.13079185e-03 1.28935219e-03 2.22913176e-03 3.59429047e-03
 3.89820547e-03 2.18873681e-03 1.16743229e-03 6.18291739e-03
 1.13743439e-03 1.20492897e-03 7.95928936e-04 1.66603655e-03
 2.36131542e-04 1.59802163e-04 9.13153126e-05 3.49062699e-04
 4.65808145e-04 3.70740425e-03 4.14268039e-02 6.59121424e-02]
cqt_chromas[similar_index]: [0.0000000e+00 9.8043575e-04 8.4153085e-04 5.5777393e-03 5.0396379e-03
 2.8586816e-03 2.7287807e-03 7.8125000e-03 0.0000000e+00 0.0000000e+00
 1.0921780e-03 2.6568233e-03 2.2269878e-06 9.8370947e-07 7.7247620e-07
 4.8839485e-07 3.0511058e-07 3.5413541e-07 7.7086770e-05 9.1814471e-04]
similar_index: 17932
--
-- generate 7 / 10
score: [[1.01913557]]
predict_vec: [1.09223381e-03 1.84234185e-03 9.63456521e-04 1.31208578e-03
 3.68766580e-03 1.99788553e-03 1.28627231e-03 1.82169979e-03
 2.01544678e-03 1.48447638e-03 2.06246763e-03 4.66884626e-03
 1.94413427e-04 1.41097655e-04 8.18462358e-05 3.32953234e-04
 1.70907806e-04 6.162031

score: [[1.03125478]]
predict_vec: [0.00142951 0.00170556 0.00157716 0.00189098 0.00149683 0.00273542
 0.00078528 0.00142864 0.0017503  0.00138571 0.00213884 0.00569818
 0.00442437 0.00518376 0.00288368 0.00592604 0.00533544 0.0043404
 0.02713375 0.03988737]
cqt_chromas[similar_index]: [0.00000000e+00 0.00000000e+00 2.22822861e-03 1.90862303e-03
 2.50383909e-03 2.71654478e-03 1.30649214e-03 1.18182402e-03
 3.23300948e-03 2.78515043e-03 2.24114582e-03 7.81250000e-03
 1.20610366e-05 1.13716815e-05 8.35218452e-06 7.72924159e-06
 6.78592303e-04 5.66326082e-04 6.38654048e-04 4.41384409e-03]
similar_index: 21570
--
-- generate 2 / 10
score: [[1.06828051]]
predict_vec: [0.00655422 0.00631293 0.00148365 0.00163732 0.00246563 0.00137573
 0.0012128  0.0015796  0.00138588 0.00120324 0.00163893 0.00518586
 0.00019562 0.00015089 0.00070202 0.00048338 0.00054531 0.00432298
 0.03850665 0.05855379]
cqt_chromas[similar_index]: [7.8125000e-03 6.9528045e-03 2.5368193e-03 2.9864335e-03 2.0688858e-03
 3.08

score: [[1.00079764]]
predict_vec: [1.81174965e-03 2.53065513e-03 1.38756365e-03 2.92611984e-03
 7.84917548e-03 4.59877215e-03 1.66614621e-03 8.79023515e-04
 2.54169828e-03 8.37724097e-03 2.04797392e-03 3.82015918e-04
 2.03550968e-04 1.46174600e-04 8.87504211e-05 3.37857142e-04
 1.63003831e-04 4.95888002e-04 1.94477569e-02 4.89733890e-02]
cqt_chromas[similar_index]: [0.0000000e+00 0.0000000e+00 0.0000000e+00 3.6647967e-03 5.0875116e-03
 6.0867788e-03 0.0000000e+00 2.2251538e-03 3.8522582e-03 7.8125000e-03
 0.0000000e+00 0.0000000e+00 1.0142102e-08 3.0174852e-09 1.5739351e-09
 3.3527611e-10 2.3283064e-10 1.4901161e-10 0.0000000e+00 8.7573944e-04]
similar_index: 55170
--
-- generate 7 / 10
score: [[1.00694565]]
predict_vec: [2.61450559e-03 4.22060536e-03 1.43546052e-03 1.38697436e-03
 1.20891177e-03 8.24381539e-04 9.27889661e-04 1.29593536e-03
 1.47390855e-03 2.97056511e-03 1.25541317e-03 1.79536873e-03
 1.93367989e-04 1.40592689e-04 8.15111634e-05 3.29960138e-04
 1.46035978e-04 5.715496

creating ../data/out/out_independent_c81920_s20480_w10240_h640_d8_batch256_e100_rw0_1_2.wav ...
-- generate 1 / 10
score: [[1.04168779]]
predict_vec: [0.00123586 0.0016739  0.00163768 0.00255417 0.00185571 0.00188713
 0.00082841 0.00095959 0.0007212  0.00083573 0.00311882 0.00472993
 0.00307587 0.00054044 0.00061896 0.00112114 0.00038869 0.00148925
 0.01951681 0.04571462]
cqt_chromas[similar_index]: [0.0000000e+00 9.7734132e-04 3.1152847e-03 3.5766638e-03 2.5409942e-03
 2.7370097e-03 0.0000000e+00 0.0000000e+00 1.0558460e-03 0.0000000e+00
 5.7392446e-03 7.8125000e-03 7.7731172e-05 3.1189062e-05 4.0814652e-06
 1.2475624e-06 1.0203663e-06 2.4140812e-07 6.6841400e-05 1.6024246e-04]
similar_index: 1557
--
-- generate 2 / 10
score: [[1.01977239]]
predict_vec: [2.35396065e-03 2.33605807e-03 1.07343821e-03 1.22419081e-03
 1.22860458e-03 6.47996599e-03 1.29713130e-03 1.39349501e-03
 1.32136070e-03 1.31369382e-03 1.63587008e-03 2.14143889e-03
 1.96192370e-04 1.41911951e-04 8.25259558e-05 3.3201

score: [[1.03152778]]
predict_vec: [0.00296673 0.00183811 0.00150694 0.00112001 0.00513759 0.00658791
 0.00072885 0.00144235 0.00194664 0.00214152 0.00155887 0.00193579
 0.00079639 0.00035656 0.00046458 0.00044319 0.00281892 0.0029123
 0.02302202 0.04597986]
cqt_chromas[similar_index]: [1.5461380e-03 2.2857683e-03 1.7577535e-03 1.5968666e-03 3.6471833e-03
 7.8125000e-03 0.0000000e+00 1.7261012e-03 1.0621805e-03 2.1435092e-03
 1.4236652e-03 6.1707804e-04 2.3841858e-03 1.0790647e-03 2.0042359e-04
 3.2606430e-04 2.9551506e-04 7.6391698e-05 2.5659800e-04 4.0049027e-03]
similar_index: 35479
--
-- generate 7 / 10
score: [[1.0187911]]
predict_vec: [0.00102935 0.00247821 0.00119624 0.00086867 0.00132248 0.00153993
 0.00703142 0.00178034 0.00154773 0.00091065 0.001012   0.00122907
 0.00025608 0.00021455 0.00019425 0.00035596 0.00027945 0.00404659
 0.04470077 0.06735125]
cqt_chromas[similar_index]: [1.9416317e-03 2.5428471e-03 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 7.8125000e-0

creating ../data/out/out_independent_c81920_s20480_w10240_h640_d8_batch256_e100_rw0_1_3.wav ...
-- generate 1 / 10
score: [[1.00489637]]
predict_vec: [0.00142969 0.00278576 0.00332249 0.00130273 0.00314696 0.00303934
 0.00089688 0.00044796 0.00450973 0.00240489 0.00116211 0.0006893
 0.00022786 0.00017396 0.00011245 0.0003452  0.00012703 0.00014125
 0.01170675 0.03668424]
cqt_chromas[similar_index]: [4.8569664e-03 6.3739372e-03 7.8125000e-03 3.4366222e-03 2.5705928e-03
 4.5898743e-03 0.0000000e+00 0.0000000e+00 6.0623623e-03 3.4897879e-03
 3.8284725e-03 0.0000000e+00 3.4654895e-05 7.5912336e-05 5.8767793e-05
 5.6131044e-05 1.3775859e-04 1.8742117e-04 5.3566933e-04 4.7000885e-04]
similar_index: 50566
--
-- generate 2 / 10
score: [[1.00740967]]
predict_vec: [0.00092066 0.00120538 0.00171751 0.00574169 0.00346519 0.00132155
 0.00137194 0.00111393 0.00152338 0.00105162 0.00097305 0.00179338
 0.0002096  0.00015307 0.00013065 0.00037686 0.00033307 0.00028893
 0.01521106 0.04032134]
cqt_chroma

score: [[1.04192621]]
predict_vec: [2.01159669e-03 2.35184073e-03 7.30688858e-04 3.92544782e-03
 1.47914246e-03 1.61192589e-03 1.46944646e-03 2.47299625e-03
 3.00563872e-03 2.30736565e-03 1.24262099e-03 8.73752637e-04
 1.95543966e-04 1.42007659e-04 8.24967647e-05 3.30789131e-04
 1.39777680e-04 1.94025284e-04 1.37064075e-02 3.70982513e-02]
cqt_chromas[similar_index]: [5.0031412e-03 5.3237169e-03 3.0938338e-03 5.4165209e-03 0.0000000e+00
 2.9262847e-03 2.5228448e-03 6.6120857e-03 7.8125000e-03 3.5873512e-03
 2.9880090e-03 2.5830467e-03 1.9178027e-05 9.4065908e-06 7.0653946e-06
 5.0586555e-06 2.1457672e-06 6.6891312e-07 7.5381482e-05 2.2939748e-04]
similar_index: 11090
--
-- generate 7 / 10
score: [[1.04192621]]
predict_vec: [2.01159669e-03 2.35184073e-03 7.30688858e-04 3.92544782e-03
 1.47914246e-03 1.61192589e-03 1.46944646e-03 2.47299625e-03
 3.00563872e-03 2.30736565e-03 1.24262099e-03 8.73752637e-04
 1.95543966e-04 1.42007659e-04 8.24967647e-05 3.30789131e-04
 1.39777680e-04 1.940252

score: [[1.0450495]]
predict_vec: [0.0058285  0.00397173 0.0019302  0.00112883 0.00188896 0.00222978
 0.00242513 0.00292474 0.00082573 0.00092537 0.0004245  0.00356375
 0.00414724 0.00253169 0.00418622 0.001248   0.00088417 0.00216462
 0.02075928 0.03586965]
cqt_chromas[similar_index]: [0.0078125  0.005641   0.00481478 0.00248959 0.00136984 0.00268158
 0.00228031 0.00292655 0.00119576 0.00095183 0.00115064 0.00292374
 0.00125189 0.00044856 0.00221073 0.00402346 0.00254506 0.00122355
 0.00771457 0.00626478]
similar_index: 39545
--
-- generate 2 / 10
score: [[1.02367969]]
predict_vec: [0.00950072 0.00283681 0.00113235 0.00164669 0.00169123 0.0010601
 0.001101   0.00108696 0.00144394 0.00103891 0.00121952 0.0032402
 0.00355397 0.00453992 0.00235371 0.00089455 0.00739376 0.00570009
 0.02415075 0.03502685]
cqt_chromas[similar_index]: [7.8125000e-03 0.0000000e+00 0.0000000e+00 1.0033917e-03 0.0000000e+00
 0.0000000e+00 9.6360821e-04 1.4188398e-03 1.7692243e-03 1.1708997e-03
 9.2955615e-04 2.

score: [[1.02318945]]
predict_vec: [7.01596786e-04 1.12324231e-03 2.00369000e-03 4.56915330e-03
 3.61258234e-03 4.25769109e-03 1.53520890e-03 7.65371427e-04
 2.09671678e-03 1.37036131e-03 1.84259017e-03 6.28395763e-04
 1.93739863e-04 1.40275719e-04 8.17502951e-05 3.37216014e-04
 1.82572083e-04 3.88821843e-03 5.31562828e-02 6.90005124e-02]
cqt_chromas[similar_index]: [0.00115198 0.00384942 0.00330536 0.00735678 0.00721915 0.0078125
 0.         0.         0.00312427 0.0015031  0.00283568 0.00368918
 0.00542674 0.00472153 0.00222539 0.0017457  0.00332959 0.00393913
 0.004673   0.00821012]
similar_index: 22491
--
-- generate 7 / 10
score: [[1.05702956]]
predict_vec: [0.00091423 0.00235947 0.00131156 0.00161977 0.01310182 0.00175659
 0.00186025 0.00169918 0.00082586 0.00119987 0.00072975 0.00136269
 0.00337089 0.0015239  0.00309252 0.00424355 0.00456277 0.00768147
 0.03157255 0.04444528]
cqt_chromas[similar_index]: [2.8774037e-04 6.2300166e-04 7.6152501e-04 7.3205482e-04 7.8125000e-03
 0.00

score: [[1.01983118]]
predict_vec: [0.00494784 0.0014078  0.00155383 0.00136407 0.00251562 0.00321549
 0.00186093 0.00278311 0.00252253 0.00223548 0.00186733 0.00122507
 0.00659152 0.0041552  0.00214125 0.00386346 0.00558882 0.00518264
 0.02491891 0.03992233]
cqt_chromas[similar_index]: [7.8125000e-03 0.0000000e+00 2.1265629e-03 1.9695768e-03 4.2219409e-03
 6.0140090e-03 4.2841807e-03 5.6637945e-03 0.0000000e+00 3.4837935e-03
 5.0851814e-03 1.3361827e-03 1.2338171e-05 1.0365853e-05 9.0359244e-06
 6.5870677e-06 4.7212839e-06 3.0152592e-06 2.9937276e-03 2.6744502e-03]
similar_index: 2541
--
-- generate 2 / 10
score: [[1.0332196]]
predict_vec: [8.25942727e-04 1.01999438e-03 6.36763871e-04 1.19442970e-03
 1.41803699e-03 4.27746121e-03 1.79270457e-03 3.16403690e-03
 1.78766856e-03 2.51473626e-03 5.93560515e-04 1.33178476e-03
 1.98432841e-04 1.41820245e-04 8.26129617e-05 3.89495428e-04
 3.43340659e-03 3.31350905e-03 2.48441081e-02 4.63769399e-02]
cqt_chromas[similar_index]: [1.0543264e-03 8.

score: [[1.04470512]]
predict_vec: [1.19903835e-03 2.28734524e-03 1.22152804e-03 1.69710012e-03
 5.34339296e-03 4.40434087e-03 1.95127155e-03 1.26113486e-03
 1.59877690e-03 1.04026357e-03 1.17338903e-03 1.25750701e-03
 1.94812761e-04 1.42056000e-04 9.88245301e-05 4.40687552e-04
 4.86843463e-04 3.72270209e-04 1.56732164e-02 4.10703309e-02]
cqt_chromas[similar_index]: [3.1196014e-03 2.1355923e-03 2.3802526e-03 1.0304464e-03 7.8125000e-03
 6.2770736e-03 3.4969742e-03 0.0000000e+00 0.0000000e+00 1.8927722e-03
 1.7603135e-03 2.7873812e-03 2.0234622e-05 9.4440675e-06 3.8625299e-06
 2.1368357e-06 2.7866008e-06 4.2372281e-05 6.3203869e-04 1.0563292e-03]
similar_index: 5049
--
-- generate 7 / 10
score: [[1.03427112]]
predict_vec: [3.99496220e-03 2.92696012e-03 8.51827732e-04 6.60422258e-04
 1.46043720e-03 3.12376418e-03 2.26600538e-03 3.02854483e-03
 1.79712137e-03 1.25010405e-03 2.55652377e-03 3.59376660e-03
 1.96172361e-04 1.42239121e-04 8.35078245e-05 3.52006464e-04
 4.69297840e-04 7.7297969

score: [[1.00844821]]
predict_vec: [2.49392074e-03 2.68385978e-03 1.54765393e-03 1.39155681e-03
 3.53926630e-03 2.89651775e-03 9.66543914e-04 2.10902933e-03
 1.73811370e-03 6.69092464e-04 1.42426346e-03 6.73848856e-03
 1.98136782e-04 1.44925652e-04 8.60677537e-05 3.34185548e-04
 1.17695316e-04 1.34946822e-04 1.49009172e-02 4.19255234e-02]
cqt_chromas[similar_index]: [5.42258332e-03 3.97270266e-03 2.06101127e-03 2.61834357e-03
 3.90783511e-03 4.58083628e-03 0.00000000e+00 0.00000000e+00
 2.61900178e-03 3.37119959e-03 0.00000000e+00 7.81250000e-03
 1.09233797e-05 1.15992125e-05 1.30778644e-05 5.28629462e-04
 7.42515898e-04 5.83742629e-04 4.12721623e-04 2.01495993e-03]
similar_index: 34525
--
-- generate 2 / 10
score: [[1.04601576]]
predict_vec: [0.01718384 0.00337513 0.00186931 0.00153024 0.00209538 0.00088752
 0.00066351 0.0010278  0.00157469 0.00078381 0.00059529 0.00531771
 0.00021073 0.00028913 0.00085467 0.00048006 0.00036813 0.0029986
 0.02745436 0.04727462]
cqt_chromas[similar_ind

score: [[1.00921248]]
predict_vec: [0.00111633 0.00366789 0.00180063 0.002063   0.00317559 0.00354746
 0.00145227 0.00149248 0.00178261 0.00306577 0.00242028 0.00199443
 0.00020604 0.00015555 0.00010941 0.00033986 0.00013922 0.0001677
 0.01405027 0.03942935]
cqt_chromas[similar_index]: [0.0000000e+00 4.9978970e-03 3.5277437e-03 7.8125000e-03 6.6656312e-03
 7.7495738e-03 6.0256575e-03 3.5415976e-03 3.9665615e-03 3.8248098e-03
 3.0992650e-03 5.8090682e-03 2.0540524e-03 1.1913114e-03 1.7451934e-03
 1.0638809e-03 2.8193000e-04 9.8855831e-05 5.0300507e-05 5.7215411e-03]
similar_index: 29576
--
-- generate 7 / 10
score: [[1.00852059]]
predict_vec: [0.00152801 0.00349313 0.00124582 0.00271588 0.00603334 0.00306346
 0.00206671 0.00116625 0.0015182  0.0010479  0.00075405 0.00094018
 0.0017436  0.00052801 0.00018887 0.000362   0.00019762 0.00524616
 0.05138693 0.07090001]
cqt_chromas[similar_index]: [3.5517246e-03 7.8125000e-03 0.0000000e+00 6.9314996e-03 6.9021974e-03
 3.6747418e-03 4.8374082e-

score: [[1.00661224]]
predict_vec: [0.00148859 0.00235214 0.00156502 0.00192875 0.00363146 0.00195797
 0.00249068 0.00118984 0.00165986 0.00121544 0.00161217 0.00120785
 0.00413925 0.00447763 0.00433931 0.00559318 0.00460119 0.00334951
 0.02465619 0.03918899]
cqt_chromas[similar_index]: [0.0000000e+00 4.9978970e-03 3.5277437e-03 7.8125000e-03 6.6656312e-03
 7.7495738e-03 6.0256575e-03 3.5415976e-03 3.9665615e-03 3.8248098e-03
 3.0992650e-03 5.8090682e-03 2.0540524e-03 1.1913114e-03 1.7451934e-03
 1.0638809e-03 2.8193000e-04 9.8855831e-05 5.0300507e-05 5.7215411e-03]
similar_index: 29576
--
-- generate 2 / 10
score: [[1.00852059]]
predict_vec: [0.00152801 0.00349313 0.00124582 0.00271588 0.00603334 0.00306346
 0.00206671 0.00116625 0.0015182  0.0010479  0.00075405 0.00094018
 0.0017436  0.00052801 0.00018887 0.000362   0.00019762 0.00524616
 0.05138693 0.07090001]
cqt_chromas[similar_index]: [3.5517246e-03 7.8125000e-03 0.0000000e+00 6.9314996e-03 6.9021974e-03
 3.6747418e-03 4.8374082e

score: [[1.06287952]]
predict_vec: [4.89962939e-03 6.23327028e-03 2.24719802e-03 2.47306307e-03
 1.45637710e-03 1.28573400e-03 1.55119179e-03 1.42677478e-03
 9.55551106e-04 1.03989488e-03 8.28171498e-04 8.64952628e-04
 1.97088782e-04 1.42653953e-04 8.56160696e-05 3.39377468e-04
 1.41419718e-04 1.54058551e-04 1.29584689e-02 4.02140655e-02]
cqt_chromas[similar_index]: [6.1842892e-03 7.8125000e-03 2.6477815e-03 2.9662503e-03 0.0000000e+00
 0.0000000e+00 1.7242876e-03 1.7200392e-03 2.6863860e-04 1.5095584e-03
 0.0000000e+00 0.0000000e+00 2.1843842e-04 9.3705799e-05 6.9308873e-05
 4.4707962e-05 2.1432423e-05 5.8580783e-05 6.2332262e-04 1.2877777e-03]
similar_index: 54971
--
-- generate 7 / 10
score: [[1.0495127]]
predict_vec: [3.08193179e-04 5.72377676e-03 4.66784043e-03 2.26083328e-03
 4.25671227e-03 2.15467461e-03 6.85938285e-04 9.04473069e-04
 4.50633554e-04 4.60627838e-04 2.98701954e-04 6.80622470e-04
 2.11940118e-04 1.48965686e-04 8.87359492e-05 3.53256502e-04
 4.90372651e-04 1.1344677

score: [[1.02840286]]
predict_vec: [0.00113153 0.00602484 0.00390188 0.00193377 0.00194056 0.00108271
 0.00114069 0.00202918 0.00169588 0.00539395 0.00373373 0.00176345
 0.00062638 0.00158752 0.00287625 0.00185753 0.00334136 0.00342408
 0.03026875 0.04318362]
cqt_chromas[similar_index]: [0.0000000e+00 7.8125000e-03 7.5491951e-03 3.7573969e-03 1.6473411e-03
 0.0000000e+00 0.0000000e+00 2.9877927e-03 2.4938108e-03 5.9744208e-03
 3.6407611e-03 2.0973331e-03 6.3788309e-04 4.2773530e-04 1.8650315e-04
 1.2839556e-04 8.6352462e-05 3.3773093e-03 3.0346082e-03 2.0339086e-03]
similar_index: 34337
--
-- generate 2 / 10
score: [[1.02634496]]
predict_vec: [0.00185984 0.0051795  0.00111401 0.00234778 0.00255084 0.00285406
 0.00118401 0.00106167 0.00166164 0.00256991 0.00241621 0.0012529
 0.00024466 0.00016943 0.00015126 0.00276137 0.00345768 0.00298581
 0.01822109 0.04261152]
cqt_chromas[similar_index]: [0.00381759 0.0078125  0.         0.00468124 0.00334835 0.00261729
 0.00300343 0.00260424 0.00140

score: [[1.05251939]]
predict_vec: [0.00127689 0.00515987 0.00909283 0.002536   0.00090572 0.00119747
 0.00097248 0.00096905 0.0005633  0.00066648 0.00084633 0.0016838
 0.00042481 0.00041166 0.0012159  0.00049859 0.00085078 0.00320958
 0.03079539 0.04819455]
cqt_chromas[similar_index]: [2.0296436e-03 3.5497262e-03 7.8125000e-03 0.0000000e+00 1.3255037e-03
 1.1588228e-03 1.8337541e-03 1.0828606e-03 1.0275104e-03 8.2948006e-04
 1.2845164e-03 1.4047049e-03 1.0385513e-05 6.2164754e-06 2.4034082e-06
 1.0576937e-06 1.8253922e-07 1.1269003e-07 7.7301273e-05 9.0252317e-05]
similar_index: 43701
--
-- generate 7 / 10
score: [[1.050282]]
predict_vec: [1.10496592e-03 3.87830776e-03 6.26046443e-03 1.10506499e-03
 6.85890613e-04 1.17892644e-03 6.90370041e-04 1.53289747e-03
 3.38271214e-03 1.75213325e-03 8.79622123e-04 7.78748363e-04
 1.93996617e-04 1.40311575e-04 8.18933186e-05 3.26834037e-04
 1.32905247e-04 1.52955487e-04 1.37871606e-02 3.84220965e-02]
cqt_chromas[similar_index]: [2.13817693e-03 3.

score: [[1.04262408]]
predict_vec: [1.22955802e-03 1.47659809e-03 6.31118950e-04 1.28091325e-03
 3.46541847e-03 5.92146721e-03 1.90136523e-03 8.80648207e-04
 1.73219910e-03 1.84563058e-03 1.45702774e-03 1.54961215e-03
 1.96209585e-04 1.50654421e-04 9.04667977e-05 3.31799820e-04
 1.15298579e-04 1.32625079e-04 1.38682257e-02 4.04378995e-02]
cqt_chromas[similar_index]: [1.1466286e-03 3.0149749e-04 7.4202142e-04 1.3650345e-03 5.8728582e-03
 7.8125000e-03 3.0620208e-03 1.7649835e-03 2.2438820e-03 0.0000000e+00
 1.6772537e-03 1.6674774e-03 5.6911111e-05 2.4683624e-05 1.0189749e-05
 4.4985209e-06 3.8865578e-06 3.7553255e-06 2.0504578e-03 2.1069467e-03]
similar_index: 17286
--
-- generate 2 / 10
score: [[1.03720934]]
predict_vec: [1.26381754e-03 6.99633791e-04 9.47175431e-04 2.12760991e-03
 3.37126665e-03 3.22669302e-03 2.61027669e-03 1.51930796e-03
 2.66020931e-03 2.90145073e-03 5.02550649e-03 6.55654469e-04
 1.98180991e-04 1.41166427e-04 8.19039415e-05 3.69413494e-04
 2.93969177e-03 3.033540

score: [[1.00265705]]
predict_vec: [1.23335852e-03 1.17658835e-03 1.01611239e-03 2.42867973e-03
 3.38422554e-03 1.34693249e-03 1.35358842e-03 3.26961791e-03
 2.86361109e-03 2.89279060e-03 1.11546251e-03 1.06627424e-03
 1.94667184e-04 1.41409735e-04 8.27881086e-05 3.44272237e-04
 3.09475756e-04 2.29251455e-04 1.45957666e-02 4.10747156e-02]
cqt_chromas[similar_index]: [0.00099355 0.00111688 0.         0.00319597 0.00542436 0.
 0.         0.0078125  0.00433836 0.00506079 0.         0.00169622
 0.00115618 0.00113511 0.00029804 0.00104722 0.00115764 0.00257812
 0.00198575 0.00507387]
similar_index: 31670
--
-- generate 7 / 10
score: [[1.010583]]
predict_vec: [0.00091718 0.00189022 0.00189378 0.00117882 0.00263936 0.00098695
 0.00111805 0.00093025 0.00952307 0.00187438 0.00183267 0.00110846
 0.00031289 0.00057525 0.00146419 0.00168244 0.00295295 0.00476818
 0.03291292 0.04229014]
cqt_chromas[similar_index]: [0.00111413 0.         0.         0.         0.         0.
 0.         0.         0.0