In [1]:
# 環境構築

!apt-get update
!apt-get install -y libsndfile1-dev
!pip install scipy
!pip install scikit-learn
!pip install librosa

/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
Reading package lists... Done
E: Could not open lock file /var/lib/apt/lists/lock - open (13: Permission denied)
E: Unable to lock directory /var/lib/apt/lists/
W: Problem unlinking the file /var/cache/apt/pkgcache.bin - RemoveCaches (13: Permission denied)
W: Problem unlinking the file /var/cache/apt/srcpkgcache.bin - RemoveCaches (13: Permission denied)
/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
E: Could not open lock file /var/lib/dpkg/lock-frontend - open (13: Permission denied)
E: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?
/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /home/a

In [2]:
# clips 作成

SAMPLING_RATE = 44100 # 変更不可

clip_size = 81920 # 楽曲を再構築するパーツ1つあたりの大きさ
step_size = 20480 # clip をずらすときの大きさ
window_size = 10240 # CQT_CHROMA を取得するのに使用するサンプル数
hop_size = 640 # window をずらすときの大きさ

from scipy.io.wavfile import read, write
import glob
import os
import numpy as np
import librosa
import sys

if os.path.isfile("../data/out/clips4predict.wav"):
    print("../data/out/clips4predict.wav exists.")
    files = ["../data/out/clips4predict.wav"]
else:
    # clips.npy をもとにデータを作成する 
    if os.path.isfile("../data/arrays/clips4predict.npy"):
        print("loading ../data/arrays/clips4predict.npy ...")
        clips = np.load("../data/arrays/clips4predict.npy")
        print("creating ../data/out/clips4predict.wav ...")
        write("../data/out/clips4predict.wav", SAMPLING_RATE, clips.reshape((clips.shape[0] * clips.shape[1], )))
        files = ["../data/out/clips4predict.wav"]
        
    # clips,npy がないとき /data/wav44100 内の WAV ファイルを参照する
    else:
        files = glob.glob("../data/wav4predict/*.wav")
        #files = ["../data/wav44100/3DEmbodimentFromLines.wav"] # デバッグ用 
        
if len(files) == 0:
    print("WAV ファイルが見つかりませんでした。")
    sys.exit(1)
        
clips_filename = "../data/arrays/c" + str(clip_size) + "_s" + str(step_size) + "_f32_clips4predict"

if os.path.isfile(clips_filename + ".npy"):
    print("loading " + clips_filename + ".npy ...")
    clips = np.load(clips_filename + ".npy")
else:
    print("creating " + clips_filename + ".npy ...")
    raw_data_list = [librosa.load(file, sr=SAMPLING_RATE)[0] for file in files] # 左の音だけ使う

    clips = np.zeros((0, clip_size), dtype=np.float32)
    for raw_data in raw_data_list:
        tmp = [raw_data[i:i + clip_size] for i in range(0, len(raw_data) - clip_size, step_size)]
        clips = np.vstack((clips, np.array(tmp)))
    np.save(clips_filename, clips)

print("The clip array has " + str(clips.shape[0]) + " clips.")

loading ../data/arrays/c81920_s20480_f32_clips4predict.npy ...
The clip array has 41637 clips.


In [3]:
# cqt 作成

n_bins = 84

cqt_filename = clips_filename + "_w" + str(window_size) + "_h" + str(hop_size) + "_cqts4predict"

if os.path.isfile(cqt_filename + ".npy"):
    print("loading " + cqt_filename + ".npy ...")
    cqts = np.load(cqt_filename + ".npy")
else:
    print("creating " + cqt_filename + ".npy ...")
    num = 0
    for clip in clips:
        print("cqt progress: clip " + str(num + 1) + " / " + str(len(clips)))
        
        tmp_cqt = librosa.cqt(clip, sr=SAMPLING_RATE, hop_length=hop_size, n_bins=n_bins)
        tmp_cqt = tmp_cqt.reshape((1, tmp_cqt.shape[0], tmp_cqt.shape[1]))
        
        if num == 0:
            cqts = tmp_cqt
        else:
            cqts = np.vstack((cqts, tmp_cqt))
            
        num += 1

    np.save(cqt_filename, cqts)
    
print("The cqt array has " + str(cqts.shape[0]) + " cqts.")
print("cqts.shape: " + str(cqts.shape))
print("Type(cqts[0][0][0]): " + str(type(cqts[0][0][0])))
print("np.max(cqts[0][0]): " + str(np.max(cqts[0][0])))

loading ../data/arrays/c81920_s20480_f32_clips4predict_w10240_h640_cqts4predict.npy ...
The cqt array has 41637 cqts.
cqts.shape: (41637, 84, 129)
Type(cqts[0][0][0]): <class 'numpy.complex64'>
np.max(cqts[0][0]): (0.04157509+0.022495579j)


In [4]:
# cqt_choroma 作成

def Preprocessing(array):
    array = np.abs(array)
    array = np.log(array + 1)
    array = array / np.log(np.finfo(np.float32).max)
    array = array.T
    return array

cqt_chroma_filename = clips_filename + "_w" + str(window_size) + "_h" + str(hop_size) + "_cqt_chromas4predict"

if os.path.isfile(cqt_chroma_filename + ".npy"):
    print("loading " + cqt_chroma_filename + ".npy ...")
    cqt_chromas= np.load(cqt_chroma_filename + ".npy")
else:
    print("creating " + cqt_chroma_filename + ".npy ...")
    num = 0
    for cqt in cqts:
        print("cqt_chroma progress: clip " + str(num + 1) + " / " + str(len(cqts)))
        
        tmp_cqt_chroma = librosa.feature.chroma_cqt(C=cqt, sr=SAMPLING_RATE)
        tmp_cqt_chroma = Preprocessing(tmp_cqt_chroma)
        tmp_cqt_chroma = tmp_cqt_chroma.reshape((1, tmp_cqt_chroma.shape[0], tmp_cqt_chroma.shape[1]))
        
        if num == 0:
            cqt_chromas = tmp_cqt_chroma
        else:
            cqt_chromas = np.vstack((cqt_chromas, tmp_cqt_chroma))
            
        num += 1

    np.save(cqt_chroma_filename, cqt_chromas)
    
print("The cqt_chroma array has " + str(cqt_chromas.shape[0]) + " cqt_chromas.")
print("cqt_chromas.shape: " + str(cqt_chromas.shape)) # clip 番号、window 番号、 chroma 番号 になる
print("Type(cqt_chromas[0][0][0]): " + str(type(cqt_chromas[0][0][0])))
print("np.max(cqt_chromas[0][0]): " + str(np.max(cqt_chromas[0][0])))

loading ../data/arrays/c81920_s20480_f32_clips4predict_w10240_h640_cqt_chromas4predict.npy ...
The cqt_chroma array has 41637 cqt_chromas.
cqt_chromas.shape: (41637, 129, 12)
Type(cqt_chromas[0][0][0]): <class 'numpy.float32'>
np.max(cqt_chromas[0][0]): 0.0078125


In [5]:
# max_norm 作成

num_divide = 8

max_norm_filename = clips_filename + "_d" + str(num_divide) + "_max_norms4predict"

if os.path.isfile(max_norm_filename + ".npy"):
    print("loading " + max_norm_filename + ".npy ...")
    max_norms= np.load(max_norm_filename + ".npy")
else:
    print("creating " + max_norm_filename + ".npy ...")
    num = 0
    for clip in clips:
        print("max_norm progress: clip " + str(num + 1) + " / " + str(len(clips)))
        max_norm = [np.max(clip[int((i / num_divide) * len(clip)):int(((i + 1) / num_divide) * len(clip))] ** 2) for i in range(num_divide)]
    
        if num == 0:
            max_norms = np.array(max_norm)
        else:
            max_norms = np.vstack((max_norms, np.array(max_norm)))
        num += 1
        
    np.save(max_norm_filename, max_norms)
    
print("The max_norm array has " + str(max_norms.shape[0]) + " max_norms.")
print("max_norms.shape: " + str(max_norms.shape))
print("Type(max_norms[0][0]): " + str(type(max_norms[0][0])))
print("np.max(max_norms[0]): " + str(np.max(max_norms[0])))

loading ../data/arrays/c81920_s20480_f32_clips4predict_d8_max_norms4predict.npy ...
The max_norm array has 41637 max_norms.
max_norms.shape: (41637, 8)
Type(max_norms[0][0]): <class 'numpy.float32'>
np.max(max_norms[0]): 0.14306161


In [6]:
# 学習データとテストデータに分ける

def normalize(array):
    if (np.sum(array) == 0):
        return array
    else:
        return array / np.sum(array)
    
cqt_chroma_sum_threshold = 0.01 # 次の window の sum が閾値に満たないときに除外します
test_data_rate = 0.1

window_num_per_clip = cqt_chromas.shape[1]

cqt_chromas = np.array([np.hstack((cqt_chromas[i], np.repeat(np.array([max_norms[i]]), cqt_chromas.shape[1], axis=0))) for i in range(len(max_norms))])
cqt_chromas = np.concatenate([cqt_chromas[:-1, :, :], cqt_chromas[1:, 0, :].reshape(cqt_chromas.shape[0] - 1, 1, cqt_chromas.shape[2])], 1)

p = np.random.permutation(len(cqt_chromas))
cqt_chromas = cqt_chromas[p]
clips = clips[p]

x = np.delete(cqt_chromas, np.where(np.sum(cqt_chromas[:,-1,:], axis=1) < cqt_chroma_sum_threshold)[0], axis=0)

x_test = x[:int(x.shape[0] * test_data_rate), :window_num_per_clip, :].reshape(int(x.shape[0] * test_data_rate), window_num_per_clip, x.shape[2], 1)
x_train = x[int(x.shape[0] * test_data_rate):, :window_num_per_clip, :].reshape(x.shape[0] - int(x.shape[0] * test_data_rate), window_num_per_clip, x.shape[2], 1)

y = np.array([x[:, window_num_per_clip, i] for i in range(x.shape[2])])
y_maxs = np.max(y, axis=1)

y_tests =  np.array([x[:int(x.shape[0] * test_data_rate), window_num_per_clip, i] / y_maxs[i] for i in range(x.shape[2])])
y_trains = np.array([x[int(x.shape[0] * test_data_rate):, window_num_per_clip, i] / y_maxs[i] for i in range(x.shape[2])])

print("x_train.shape: " + str(x_train.shape))
print("x_test.shape: " + str(x_test.shape))
print("y_trains.shape: " + str(y_trains.shape))
print("y_tests.shape: " + str(y_tests.shape))

x_train.shape: (37439, 129, 20, 1)
x_test.shape: (4159, 129, 20, 1)
y_trains.shape: (20, 37439)
y_tests.shape: (20, 4159)


In [None]:
# 楽曲の出力

import sklearn
from sklearn.metrics.pairwise import cosine_similarity
from keras.models import load_model
import soundfile as sf

batch_size = 64
epochs = 100
data_name = "c" + str(clip_size) + "_s" + str(step_size) + "_w" + str(window_size) + "_h" + str(hop_size) + "_d" + str(num_divide)

def add_fade(x, ft_len):
    r = np.arange(0, ft_len)*np.pi/ft_len
    w_fo = (0.5+0.5*np.cos(r))**0.5
    w_fi = (0.5-0.5*np.cos(r))**0.5
    
    x[0:ft_len]        *= w_fi
    x[clip_size-ft_len::] *= w_fo
    return x

def gen_xfade(x_pre, x_next, ft_len):
    if x_pre is None:
        xfade = x_next
    else:
        x_pre_begin = x_pre[:x_pre.shape[0] - ft_len]
        x_pre_end = x_pre[x_pre.shape[0] - ft_len:]
        x_pre_len = clip_size
        x_next_len = clip_size
        x_pre_len -= ft_len
        x_next_len -= ft_len
        xfade = np.concatenate((x_pre_begin, np.concatenate((x_pre_end, np.zeros(x_next_len))) + x_next))
    return xfade

def create_music_consider_weights(fname, rw):
    first_index = np.random.randint(0, len(cqt_chromas))
    predict_index = first_index
    out = None
    fade_samples = 10240
    
    for i in range(num_clips):
        print("-- generate " + str(i + 1) + " / " + str(num_clips))
        predict_vec = np.zeros((0, ))
        for i in range(x.shape[2]):
            predict = models[i].predict(np.array([cqt_chromas[predict_index, :-1]]))[0] * y_maxs[i]
            predict_vec = np.hstack((predict_vec, predict))
        predict_chroma = predict_vec[:len(predict_vec)-num_divide]
        predict_rhythm = predict_vec[len(predict_vec)-num_divide:]

        index = 0
        similar_index = 0
        score = -1 - rw
        chroma_score = 0
        rhythm_score = 0
        for cqt_chroma in cqt_chromas[:, (int)(fade_samples / hop_size)]:
            cqt_chroma_chroma = cqt_chroma[:len(cqt_chroma)-num_divide]
            cqt_chroma_rhythm = cqt_chroma[len(cqt_chroma)-num_divide:]
            tmp_chroma_score = cosine_similarity(np.array([predict_chroma]), np.array([cqt_chroma_chroma]))
            tmp_rhythm_score = cosine_similarity(np.array([predict_rhythm]), np.array([cqt_chroma_rhythm]))
            tmp_score = tmp_chroma_score + rw * tmp_rhythm_score
            if tmp_score > score:
                score = tmp_score
                similar_index = index
            index += 1

        print("score: " + str(score))
        print("predict_vec: " + str(predict_vec))
        print("cqt_chromas[similar_index]: " + str(cqt_chromas[similar_index, 0]))
        print("similar_index: " + str(similar_index))
        print("--")

        predict_index = similar_index

        tmp = add_fade(clips[predict_index], fade_samples)
        out = gen_xfade(out, tmp, fade_samples)
        
    sf.write(fname, out, SAMPLING_RATE, subtype="PCM_16")

models = []
for i in range(x.shape[2]):
    model_name = "independent_" + data_name + "_batch" + str(batch_size) + "_e" + str(epochs) + "_" + str(i)
    
    models.append(load_model("../data/models/" + model_name))

num_clips = 30
rhythm_weight = 0.5

for i in range(5):
    file_name = "out_" + "independent_" + data_name + "_batch" + str(batch_size) + "_e" + str(epochs) + "_rw" + format(rhythm_weight, '.2f').replace('.', '_') + "_PREDICT"
    fname = "../data/out/" + file_name + "_track" + str(i) + ".wav"
    print("creating " + fname + " ...")
    create_music_consider_weights(fname, rhythm_weight)

creating ../data/out/out_independent_c81920_s20480_w10240_h640_d8_batch64_e100_rw0_50_PREDICT_track0.wav ...
-- generate 1 / 30
score: [[1.38483637]]
predict_vec: [0.00199131 0.00071159 0.00031387 0.00124326 0.00143032 0.00048895
 0.00170856 0.00368406 0.00113449 0.00090284 0.00092779 0.0069669
 0.00415813 0.00113673 0.00998852 0.00803792 0.00233281 0.00148877
 0.0179312  0.01959166]
cqt_chromas[similar_index]: [0.         0.         0.         0.00727526 0.00634117 0.0078125
 0.         0.         0.00534474 0.         0.00388629 0.
 0.04321648 0.02550361 0.03113015 0.06957114 0.03779018 0.04421184
 0.07136959 0.12171513]
similar_index: 8838
--
-- generate 2 / 30
score: [[1.38188543]]
predict_vec: [0.00257407 0.0014714  0.00121148 0.00041323 0.00096798 0.00092266
 0.00160396 0.00220599 0.00354205 0.00084878 0.00170157 0.00151652
 0.01752283 0.03867263 0.02043845 0.0269245  0.03849548 0.06770497
 0.05873954 0.05781961]
cqt_chromas[similar_index]: [0.00099674 0.         0.         0.   

score: [[1.43172156]]
predict_vec: [0.00035148 0.00028086 0.00277035 0.00150711 0.0012462  0.00113906
 0.00138251 0.00674821 0.00512483 0.00144945 0.00107816 0.00098769
 0.04403806 0.07625731 0.06782747 0.11590368 0.05794547 0.09962687
 0.11011707 0.08734656]
cqt_chromas[similar_index]: [0.00720982 0.         0.         0.00659852 0.         0.
 0.0078125  0.0021144  0.         0.00657356 0.00321021 0.00407548
 0.08961771 0.09386946 0.13285056 0.16784035 0.14055635 0.1798114
 0.15436928 0.12221605]
similar_index: 38194
--
-- generate 7 / 30
score: [[1.44971245]]
predict_vec: [0.00045433 0.00391612 0.0072512  0.00073691 0.00243926 0.00561413
 0.00511374 0.00090456 0.00157292 0.00255094 0.00113855 0.00219151
 0.07280193 0.09285946 0.07820991 0.10312002 0.08071026 0.06763113
 0.07509097 0.07974534]
cqt_chromas[similar_index]: [0.         0.         0.0078125  0.00316105 0.00328134 0.00592156
 0.         0.         0.00357388 0.         0.         0.
 0.16822815 0.121279   0.15135063 0.187

score: [[1.44379098]]
predict_vec: [0.00157756 0.00016411 0.00164375 0.00037679 0.00141016 0.00091172
 0.00316414 0.00122721 0.00033599 0.00124739 0.00696049 0.0023088
 0.06639849 0.08948544 0.06517144 0.05434051 0.04655012 0.04245616
 0.05848144 0.05825261]
cqt_chromas[similar_index]: [0.         0.         0.00362717 0.00404821 0.00698063 0.00586926
 0.0078125  0.         0.         0.         0.         0.00745137
 0.05526832 0.09020336 0.06065229 0.06627074 0.04106782 0.04995705
 0.06230941 0.07527795]
similar_index: 35400
--
-- generate 12 / 30
score: [[1.44806054]]
predict_vec: [0.00042896 0.00092528 0.00290945 0.00098266 0.00024761 0.00024688
 0.00034858 0.00281438 0.00576016 0.00268681 0.00055463 0.00055913
 0.03424564 0.03664557 0.0228408  0.02906043 0.03314874 0.04125264
 0.05074124 0.04955055]
cqt_chromas[similar_index]: [0.         0.00523385 0.00382837 0.         0.00458042 0.00527886
 0.         0.00433035 0.0078125  0.         0.         0.
 0.15066281 0.12257905 0.11908

score: [[1.43642714]]
predict_vec: [0.00517751 0.00533798 0.00303777 0.00359874 0.00118388 0.00149538
 0.00079254 0.00235549 0.00211311 0.00392888 0.00534614 0.00406335
 0.05800663 0.10563512 0.06359316 0.0866318  0.07417578 0.08265168
 0.08988722 0.08428417]
cqt_chromas[similar_index]: [0.         0.         0.         0.         0.0078125  0.
 0.         0.00548585 0.         0.00256818 0.         0.
 0.1117087  0.07695341 0.08658335 0.04658499 0.0657689  0.06924954
 0.12093915 0.05671979]
similar_index: 7662
--
-- generate 17 / 30
score: [[1.42922275]]
predict_vec: [0.00105532 0.0002976  0.0014067  0.00045247 0.00139731 0.00156774
 0.00661188 0.00378072 0.00134008 0.00042671 0.00106386 0.00559868
 0.04755711 0.02623473 0.03579436 0.03864044 0.06336348 0.03215083
 0.04734347 0.0540815 ]
cqt_chromas[similar_index]: [0.         0.         0.0056123  0.00561283 0.         0.0078125
 0.         0.         0.0016168  0.         0.         0.
 0.08682597 0.04440455 0.07781083 0.09370123 0.

score: [[1.4156336]]
predict_vec: [0.00255096 0.00069708 0.00044732 0.00177937 0.00059242 0.00692471
 0.0052918  0.00112165 0.00274082 0.00040897 0.00186731 0.0016927
 0.03039546 0.05108099 0.02962114 0.03911863 0.06519057 0.0397523
 0.06186856 0.05597503]
cqt_chromas[similar_index]: [0.         0.00410747 0.         0.         0.00252232 0.
 0.0078125  0.         0.00280456 0.         0.00302205 0.
 0.10971847 0.10163464 0.16151235 0.21308276 0.13925506 0.11197405
 0.14534459 0.2526774 ]
similar_index: 9748
--
-- generate 22 / 30
score: [[1.40415459]]
predict_vec: [0.00142191 0.00170117 0.00169458 0.00081325 0.00186761 0.00058342
 0.00186981 0.00142122 0.00157138 0.00266517 0.00183374 0.00556995
 0.08832113 0.11948425 0.0769674  0.06454723 0.07670412 0.13957858
 0.11852507 0.11760753]
cqt_chromas[similar_index]: [0.00423926 0.         0.00286537 0.0078125  0.         0.00363066
 0.00273941 0.         0.         0.00703169 0.00180786 0.00528288
 0.07155723 0.06509755 0.06812979 0.08433

score: [[1.39486942]]
predict_vec: [0.00249752 0.00356439 0.00474417 0.00298705 0.00162    0.00506583
 0.0026879  0.00072558 0.00277539 0.00133767 0.00052632 0.00167872
 0.01989416 0.07996718 0.04819211 0.03937748 0.07977273 0.06820457
 0.08502701 0.07211576]
cqt_chromas[similar_index]: [0.00621583 0.0078125  0.00539706 0.00615529 0.0024999  0.
 0.00318808 0.00250666 0.00164509 0.0011331  0.00767029 0.00490283
 0.07642943 0.06386541 0.04231403 0.03113553 0.05636422 0.06343425
 0.0855002  0.0732075 ]
similar_index: 20779
--
-- generate 27 / 30
score: [[1.40051616]]
predict_vec: [0.00492909 0.00088418 0.00039136 0.00159076 0.00144238 0.00143655
 0.00076397 0.00369007 0.00259328 0.00514164 0.00207628 0.00323568
 0.02350441 0.01641027 0.03166783 0.03565294 0.04515833 0.04007505
 0.06249282 0.05407182]
cqt_chromas[similar_index]: [0.00284323 0.         0.         0.00476329 0.         0.
 0.         0.         0.         0.0078125  0.         0.00097367
 0.03602559 0.0250717  0.0752361  0.0

score: [[1.39402779]]
predict_vec: [0.00385677 0.00245146 0.0018885  0.00144094 0.00348264 0.00445692
 0.00482911 0.00111053 0.00116815 0.00025782 0.00264621 0.00424916
 0.08430426 0.08715801 0.06250138 0.11481983 0.07272321 0.07389747
 0.08231285 0.08232757]
cqt_chromas[similar_index]: [0.         0.00538181 0.         0.         0.0078125  0.0075746
 0.00476547 0.         0.         0.003818   0.0049919  0.
 0.03410564 0.08208169 0.08199428 0.06229418 0.03535681 0.06436769
 0.09035924 0.08048081]
similar_index: 30409
--
-- generate 3 / 30
score: [[1.42477864]]
predict_vec: [0.00044823 0.00118808 0.00304562 0.00272679 0.00049038 0.00575909
 0.00550023 0.00345552 0.00225441 0.00030765 0.00111036 0.00094751
 0.04472994 0.03432087 0.01886239 0.03623931 0.04765946 0.04411471
 0.05839338 0.05232834]
cqt_chromas[similar_index]: [0.         0.         0.00383054 0.         0.00649985 0.
 0.         0.00372517 0.         0.         0.00601533 0.0078125
 0.11221927 0.11337746 0.07896429 0.0995

score: [[1.43838243]]
predict_vec: [0.00230289 0.00058632 0.0030333  0.00084479 0.00078604 0.00167464
 0.00212568 0.00364549 0.00268525 0.00045761 0.00068892 0.00162414
 0.03130937 0.04384744 0.0251805  0.03617816 0.03092278 0.04469411
 0.05723132 0.05065382]
cqt_chromas[similar_index]: [0.         0.00557524 0.         0.         0.         0.00588626
 0.00107148 0.         0.0078125  0.         0.00398267 0.00429621
 0.10727584 0.10796663 0.10943562 0.09224033 0.14260027 0.22307229
 0.15874028 0.17497861]
similar_index: 34543
--
-- generate 8 / 30
score: [[1.43522829]]
predict_vec: [0.00297974 0.00321855 0.00690539 0.00172939 0.00204638 0.00068857
 0.00164294 0.00070371 0.00071991 0.00138053 0.00128783 0.00102459
 0.06142809 0.05057943 0.0787054  0.12383759 0.08291    0.09695952
 0.09310892 0.09659008]
cqt_chromas[similar_index]: [0.         0.         0.         0.         0.         0.
 0.0069946  0.         0.0075979  0.0078125  0.         0.
 0.01833079 0.02710207 0.02959874 0.10

score: [[1.4213186]]
predict_vec: [0.00441056 0.00352221 0.00149261 0.00287212 0.0011311  0.00054888
 0.0014118  0.00280322 0.00216081 0.00047906 0.00229901 0.00735644
 0.08179209 0.07275306 0.08985586 0.06803282 0.07756698 0.05331126
 0.07551819 0.07073314]
cqt_chromas[similar_index]: [0.         0.         0.         0.00685244 0.         0.
 0.         0.00773758 0.00293382 0.00204346 0.0078125  0.
 0.1126593  0.1011973  0.1226218  0.09985238 0.11484131 0.06739893
 0.14088833 0.13886814]
similar_index: 10326
--
-- generate 13 / 30
score: [[1.45040906]]
predict_vec: [0.00675787 0.00460577 0.00287887 0.00097998 0.0004561  0.00037754
 0.00090904 0.00209128 0.00064643 0.00166219 0.003841   0.00666993
 0.06750353 0.05457046 0.06561238 0.03801095 0.07480307 0.07692096
 0.07528403 0.0761055 ]
cqt_chromas[similar_index]: [0.         0.         0.         0.         0.         0.
 0.         0.0078125  0.00280806 0.00559131 0.         0.00559864
 0.06844081 0.04396194 0.04584366 0.0407407  0

score: [[1.40415554]]
predict_vec: [0.00204836 0.00070555 0.00198807 0.00025157 0.00220234 0.00178366
 0.00134391 0.00202507 0.00089236 0.00040454 0.00032828 0.00611069
 0.0048956  0.00257997 0.00706113 0.00598272 0.01393357 0.00955982
 0.02230614 0.02416423]
cqt_chromas[similar_index]: [0.00417161 0.00377547 0.00472369 0.         0.         0.
 0.         0.00604481 0.00221388 0.         0.         0.0078125
 0.00916785 0.00930865 0.00881483 0.00517172 0.01300608 0.00988558
 0.02719761 0.0180386 ]
similar_index: 24145
--
-- generate 18 / 30
score: [[1.40415554]]
predict_vec: [0.00204836 0.00070555 0.00198807 0.00025157 0.00220234 0.00178366
 0.00134391 0.00202507 0.00089236 0.00040454 0.00032828 0.00611069
 0.0048956  0.00257997 0.00706113 0.00598272 0.01393357 0.00955982
 0.02230614 0.02416423]
cqt_chromas[similar_index]: [0.00417161 0.00377547 0.00472369 0.         0.         0.
 0.         0.00604481 0.00221388 0.         0.         0.0078125
 0.00916785 0.00930865 0.00881483 0.005

score: [[1.40415554]]
predict_vec: [0.00204836 0.00070555 0.00198807 0.00025157 0.00220234 0.00178366
 0.00134391 0.00202507 0.00089236 0.00040454 0.00032828 0.00611069
 0.0048956  0.00257997 0.00706113 0.00598272 0.01393357 0.00955982
 0.02230614 0.02416423]
cqt_chromas[similar_index]: [0.00417161 0.00377547 0.00472369 0.         0.         0.
 0.         0.00604481 0.00221388 0.         0.         0.0078125
 0.00916785 0.00930865 0.00881483 0.00517172 0.01300608 0.00988558
 0.02719761 0.0180386 ]
similar_index: 24145
--
-- generate 23 / 30
score: [[1.40415554]]
predict_vec: [0.00204836 0.00070555 0.00198807 0.00025157 0.00220234 0.00178366
 0.00134391 0.00202507 0.00089236 0.00040454 0.00032828 0.00611069
 0.0048956  0.00257997 0.00706113 0.00598272 0.01393357 0.00955982
 0.02230614 0.02416423]
cqt_chromas[similar_index]: [0.00417161 0.00377547 0.00472369 0.         0.         0.
 0.         0.00604481 0.00221388 0.         0.         0.0078125
 0.00916785 0.00930865 0.00881483 0.005

score: [[1.40415554]]
predict_vec: [0.00204836 0.00070555 0.00198807 0.00025157 0.00220234 0.00178366
 0.00134391 0.00202507 0.00089236 0.00040454 0.00032828 0.00611069
 0.0048956  0.00257997 0.00706113 0.00598272 0.01393357 0.00955982
 0.02230614 0.02416423]
cqt_chromas[similar_index]: [0.00417161 0.00377547 0.00472369 0.         0.         0.
 0.         0.00604481 0.00221388 0.         0.         0.0078125
 0.00916785 0.00930865 0.00881483 0.00517172 0.01300608 0.00988558
 0.02719761 0.0180386 ]
similar_index: 24145
--
-- generate 29 / 30
score: [[1.40415554]]
predict_vec: [0.00204836 0.00070555 0.00198807 0.00025157 0.00220234 0.00178366
 0.00134391 0.00202507 0.00089236 0.00040454 0.00032828 0.00611069
 0.0048956  0.00257997 0.00706113 0.00598272 0.01393357 0.00955982
 0.02230614 0.02416423]
cqt_chromas[similar_index]: [0.00417161 0.00377547 0.00472369 0.         0.         0.
 0.         0.00604481 0.00221388 0.         0.         0.0078125
 0.00916785 0.00930865 0.00881483 0.005

score: [[1.42834494]]
predict_vec: [0.00243537 0.0031763  0.00176099 0.00098656 0.00170032 0.00257166
 0.00212354 0.00073398 0.00056104 0.0019049  0.00261381 0.00700168
 0.10459241 0.07986863 0.10596196 0.09850319 0.13945566 0.09719196
 0.10922945 0.1097124 ]
cqt_chromas[similar_index]: [0.         0.0078125  0.0068554  0.00756092 0.00539393 0.00518736
 0.         0.00521495 0.         0.00347078 0.00279586 0.00480646
 0.07487658 0.08210792 0.0898737  0.0669244  0.12141719 0.16130392
 0.08719505 0.09329065]
similar_index: 10729
--
-- generate 4 / 30
score: [[1.43103906]]
predict_vec: [0.00421529 0.0043189  0.0028165  0.00071252 0.00064541 0.00082015
 0.0016247  0.00141746 0.00125901 0.00049947 0.00031234 0.00076356
 0.04939222 0.03721859 0.06877887 0.09078719 0.04604049 0.05221951
 0.05623085 0.06201186]
cqt_chromas[similar_index]: [0.00764403 0.00770498 0.00650143 0.         0.         0.
 0.         0.         0.00146692 0.00043552 0.         0.0078125
 0.04882444 0.03211783 0.042427

score: [[1.44647515]]
predict_vec: [0.00442469 0.00752485 0.00329736 0.00499292 0.00222889 0.00145182
 0.00212849 0.00291954 0.00066876 0.0006499  0.00135585 0.000313
 0.06115153 0.07536001 0.05170349 0.04253672 0.02931899 0.08582456
 0.07379388 0.07141533]
cqt_chromas[similar_index]: [0.         0.0078125  0.         0.00479484 0.         0.
 0.00748028 0.00283809 0.00025029 0.         0.         0.
 0.08626038 0.11561826 0.03624022 0.028775   0.0228797  0.1360621
 0.14923291 0.09735131]
similar_index: 9267
--
-- generate 9 / 30
score: [[1.41313687]]
predict_vec: [0.00167377 0.00627315 0.00397564 0.00249423 0.00121333 0.00303602
 0.00160676 0.00092734 0.00088396 0.00030397 0.00042755 0.00144204
 0.02023411 0.0151659  0.01233048 0.0760841  0.07883259 0.05512565
 0.06709429 0.06832128]
cqt_chromas[similar_index]: [0.         0.         0.0078125  0.         0.00131955 0.
 0.         0.00566859 0.00736307 0.00459381 0.         0.
 0.02312965 0.01637772 0.02781509 0.0606072  0.05278579 0.

score: [[1.40919244]]
predict_vec: [0.0018296  0.00493646 0.00063566 0.00080865 0.00018449 0.00220787
 0.00481303 0.00095227 0.00375168 0.00619266 0.00735386 0.00455656
 0.01721147 0.02925929 0.03089254 0.03678074 0.02251852 0.03975606
 0.05284125 0.0473205 ]
cqt_chromas[similar_index]: [0.         0.         0.         0.00461689 0.00245321 0.00583399
 0.00208476 0.0078125  0.         0.00434808 0.00681474 0.
 0.12358546 0.12026088 0.15048519 0.10060597 0.05529703 0.10021916
 0.20512871 0.12450983]
similar_index: 37506
--
-- generate 14 / 30
score: [[1.44299773]]
predict_vec: [0.00379623 0.00709373 0.00506731 0.00020375 0.00231165 0.00128604
 0.00046245 0.00424257 0.00173778 0.00054841 0.00063405 0.00303014
 0.08181587 0.05459914 0.03095674 0.05929254 0.1082502  0.06893241
 0.0880391  0.08067698]
cqt_chromas[similar_index]: [0.         0.00637944 0.0043818  0.         0.         0.
 0.         0.         0.0078125  0.         0.         0.00666587
 0.06824136 0.04960299 0.02956725 0.0

score: [[1.42592595]]
predict_vec: [0.00230599 0.00080065 0.00043223 0.00113305 0.00064755 0.00227482
 0.00404586 0.00121306 0.00164178 0.00193089 0.00035459 0.00103182
 0.05526989 0.03811761 0.07814152 0.0557751  0.07200926 0.04228158
 0.06663235 0.06063478]
cqt_chromas[similar_index]: [0.         0.00276405 0.00564529 0.         0.         0.
 0.         0.00703148 0.003141   0.00372148 0.0078125  0.00587004
 0.05140665 0.04684884 0.06457691 0.04447532 0.06366505 0.05976867
 0.0790415  0.06274438]
similar_index: 11254
--
-- generate 19 / 30
score: [[1.4321961]]
predict_vec: [0.00114999 0.00314581 0.00238292 0.00263958 0.00178194 0.00370697
 0.00167488 0.00164832 0.00096278 0.00081713 0.00666028 0.00492371
 0.03601613 0.02523175 0.03486042 0.03380953 0.04261427 0.03491298
 0.05192938 0.05003145]
cqt_chromas[similar_index]: [0.00291846 0.         0.00379189 0.         0.         0.
 0.         0.         0.         0.         0.         0.0078125
 0.069081   0.06014229 0.12409021 0.092

score: [[1.4299408]]
predict_vec: [0.00425446 0.00283982 0.00693747 0.00615976 0.00228254 0.00118576
 0.00066238 0.00246481 0.00059229 0.00097283 0.00178949 0.00231067
 0.04343555 0.08489191 0.07505282 0.08010423 0.07189158 0.06710058
 0.06815197 0.0744797 ]
cqt_chromas[similar_index]: [0.0078125  0.         0.00238585 0.00250574 0.00638742 0.
 0.         0.00104792 0.         0.00196908 0.0037252  0.00260677
 0.24310546 0.27389097 0.31228274 0.2352404  0.12522155 0.23355605
 0.21888337 0.18266964]
similar_index: 32448
--
-- generate 24 / 30
