In [1]:
# 環境構築

!apt-get update
!apt-get install -y libsndfile1-dev
!pip install scipy
!pip install scikit-learn
!pip install librosa

/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
Reading package lists... Done
E: Could not open lock file /var/lib/apt/lists/lock - open (13: Permission denied)
E: Unable to lock directory /var/lib/apt/lists/
W: Problem unlinking the file /var/cache/apt/pkgcache.bin - RemoveCaches (13: Permission denied)
W: Problem unlinking the file /var/cache/apt/srcpkgcache.bin - RemoveCaches (13: Permission denied)
/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
E: Could not open lock file /var/lib/dpkg/lock-frontend - open (13: Permission denied)
E: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?
/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /home/a

In [2]:
# clips 作成

SAMPLING_RATE = 44100 # 変更不可

clip_size = 81920 # 楽曲を再構築するパーツ1つあたりの大きさ
step_size = 20480 # clip をずらすときの大きさ
window_size = 10240 # CQT_CHROMA を取得するのに使用するサンプル数
hop_size = 640 # window をずらすときの大きさ

from scipy.io.wavfile import read, write
import glob
import os
import numpy as np
import librosa
import sys

if os.path.isfile("../data/out/clips.wav"):
    print("../data/out/clips.wav exists.")
    files = ["../data/out/clips.wav"]
else:
    # clips.npy をもとにデータを作成する 
    if os.path.isfile("../data/arrays/clips.npy"):
        print("loading ../data/arrays/clips.npy ...")
        clips = np.load("../data/arrays/clips.npy")
        print("creating ../data/out/clips.wav ...")
        write("../data/out/clips.wav", SAMPLING_RATE, clips.reshape((clips.shape[0] * clips.shape[1], )))
        files = ["../data/out/clips.wav"]
        
    # clips,npy がないとき /data/wav44100 内の WAV ファイルを参照する
    else:
        files = glob.glob("../data/wav44100/*.wav")
        #files = ["../data/wav44100/3DEmbodimentFromLines.wav"] # デバッグ用 
        
if len(files) == 0:
    print("WAV ファイルが見つかりませんでした。")
    sys.exit(1)
        
clips_filename = "../data/arrays/c" + str(clip_size) + "_s" + str(step_size) + "_f32_clips"

if os.path.isfile(clips_filename + ".npy"):
    print("loading " + clips_filename + ".npy ...")
    clips = np.load(clips_filename + ".npy")
else:
    print("creating " + clips_filename + ".npy ...")
    raw_data_list = [librosa.load(file, sr=SAMPLING_RATE)[0] for file in files] # 左の音だけ使う

    clips = np.zeros((0, clip_size), dtype=np.float32)
    for raw_data in raw_data_list:
        tmp = [raw_data[i:i + clip_size] for i in range(0, len(raw_data) - clip_size, step_size)]
        clips = np.vstack((clips, np.array(tmp)))
    np.save(clips_filename, clips)

print("The clip array has " + str(clips.shape[0]) + " clips.")

../data/out/clips.wav exists.
loading ../data/arrays/c81920_s20480_f32_clips.npy ...
The clip array has 57148 clips.


In [3]:
# cqt 作成

n_bins = 84

cqt_filename = clips_filename + "_w" + str(window_size) + "_h" + str(hop_size) + "_cqts"

if os.path.isfile(cqt_filename + ".npy"):
    print("loading " + cqt_filename + ".npy ...")
    cqts = np.load(cqt_filename + ".npy")
else:
    print("creating " + cqt_filename + ".npy ...")
    num = 0
    for clip in clips:
        print("cqt progress: clip " + str(num + 1) + " / " + str(len(clips)))
        
        tmp_cqt = librosa.cqt(clip, sr=SAMPLING_RATE, hop_length=hop_size, n_bins=n_bins)
        tmp_cqt = tmp_cqt.reshape((1, tmp_cqt.shape[0], tmp_cqt.shape[1]))
        
        if num == 0:
            cqts = tmp_cqt
        else:
            cqts = np.vstack((cqts, tmp_cqt))
            
        num += 1

    np.save(cqt_filename, cqts)
    
print("The cqt array has " + str(cqts.shape[0]) + " cqts.")
print("cqts.shape: " + str(cqts.shape))
print("Type(cqts[0][0][0]): " + str(type(cqts[0][0][0])))
print("np.max(cqts[0][0]): " + str(np.max(cqts[0][0])))

loading ../data/arrays/c81920_s20480_f32_clips_w10240_h640_cqts.npy ...
The cqt array has 57148 cqts.
cqts.shape: (57148, 84, 129)
Type(cqts[0][0][0]): <class 'numpy.complex64'>
np.max(cqts[0][0]): (0.021538047+0.007131239j)


In [4]:
# cqt_choroma 作成

def Preprocessing(array):
    array = np.abs(array)
    array = np.log(array + 1)
    array = array / np.log(np.finfo(np.float32).max)
    array = array.T
    return array

cqt_chroma_filename = clips_filename + "_w" + str(window_size) + "_h" + str(hop_size) + "_cqt_chromas"

if os.path.isfile(cqt_chroma_filename + ".npy"):
    print("loading " + cqt_chroma_filename + ".npy ...")
    cqt_chromas= np.load(cqt_chroma_filename + ".npy")
else:
    print("creating " + cqt_chroma_filename + ".npy ...")
    num = 0
    for cqt in cqts:
        print("cqt_chroma progress: clip " + str(num + 1) + " / " + str(len(cqts)))
        
        tmp_cqt_chroma = librosa.feature.chroma_cqt(C=cqt, sr=SAMPLING_RATE)
        tmp_cqt_chroma = Preprocessing(tmp_cqt_chroma)
        tmp_cqt_chroma = tmp_cqt_chroma.reshape((1, tmp_cqt_chroma.shape[0], tmp_cqt_chroma.shape[1]))
        
        if num == 0:
            cqt_chromas = tmp_cqt_chroma
        else:
            cqt_chromas = np.vstack((cqt_chromas, tmp_cqt_chroma))
            
        num += 1

    np.save(cqt_chroma_filename, cqt_chromas)
    
print("The cqt_chroma array has " + str(cqt_chromas.shape[0]) + " cqt_chromas.")
print("cqt_chromas.shape: " + str(cqt_chromas.shape)) # clip 番号、window 番号、 chroma 番号 になる
print("Type(cqt_chromas[0][0][0]): " + str(type(cqt_chromas[0][0][0])))
print("np.max(cqt_chromas[0][0]): " + str(np.max(cqt_chromas[0][0])))

loading ../data/arrays/c81920_s20480_f32_clips_w10240_h640_cqt_chromas.npy ...
The cqt_chroma array has 57148 cqt_chromas.
cqt_chromas.shape: (57148, 129, 12)
Type(cqt_chromas[0][0][0]): <class 'numpy.float32'>
np.max(cqt_chromas[0][0]): 0.0078125


In [5]:
# max_norm 作成

num_divide = 4

max_norm_filename = clips_filename + "_d" + str(num_divide) + "_max_norms"

if os.path.isfile(max_norm_filename + ".npy"):
    print("loading " + max_norm_filename + ".npy ...")
    max_norms= np.load(max_norm_filename + ".npy")
else:
    print("creating " + max_norm_filename + ".npy ...")
    num = 0
    for clip in clips:
        print("max_norm progress: clip " + str(num + 1) + " / " + str(len(clips)))
        max_norm = [np.max(clip[int((i / num_divide) * len(clip)):int(((i + 1) / num_divide) * len(clip))] ** 2) for i in range(num_divide)]
    
        if num == 0:
            max_norms = np.array(max_norm)
        else:
            max_norms = np.vstack((max_norms, np.array(max_norm)))
        num += 1
        
    np.save(max_norm_filename, max_norms)
    
print("The max_norm array has " + str(max_norms.shape[0]) + " max_norms.")
print("max_norms.shape: " + str(max_norms.shape))
print("Type(max_norms[0][0]): " + str(type(max_norms[0][0])))
print("np.max(max_norms[0]): " + str(np.max(max_norms[0])))

loading ../data/arrays/c81920_s20480_f32_clips_d4_max_norms.npy ...
The max_norm array has 57148 max_norms.
max_norms.shape: (57148, 4)
Type(max_norms[0][0]): <class 'numpy.float32'>
np.max(max_norms[0]): 0.0179649


In [6]:
# 学習データとテストデータに分ける

def normalize(array):
    if (np.sum(array) == 0):
        return array
    else:
        return array / np.sum(array)
    
cqt_chroma_sum_threshold = 0.01 # 次の window の sum が閾値に満たないときに除外します
test_data_rate = 0.1
weight = 0.01 # max_norm の重み

window_num_per_clip = cqt_chromas.shape[1]

cqt_chromas = np.array([np.hstack((cqt_chromas[i], np.repeat(np.array([max_norms[i]]), cqt_chromas.shape[1], axis=0) * weight)) for i in range(len(max_norms))])
cqt_chromas = np.concatenate([cqt_chromas[:-1, :, :], cqt_chromas[1:, 0, :].reshape(cqt_chromas.shape[0] - 1, 1, cqt_chromas.shape[2])], 1)

p = np.random.permutation(len(cqt_chromas))
cqt_chromas = cqt_chromas[p]
clips = clips[p]

x = np.delete(cqt_chromas, np.where(np.sum(cqt_chromas[:,-1,:], axis=1) < cqt_chroma_sum_threshold)[0], axis=0)

x_test = x[:int(x.shape[0] * test_data_rate), :window_num_per_clip, :].reshape(int(x.shape[0] * test_data_rate), window_num_per_clip, x.shape[2], 1)
x_train = x[int(x.shape[0] * test_data_rate):, :window_num_per_clip, :].reshape(x.shape[0] - int(x.shape[0] * test_data_rate), window_num_per_clip, x.shape[2], 1)
y_tests =  np.array([x[:int(x.shape[0] * test_data_rate), window_num_per_clip, i] for i in range(x.shape[2])])
y_trains = np.array([x[int(x.shape[0] * test_data_rate):, window_num_per_clip, i] for i in range(x.shape[2])])

print("x_train.shape: " + str(x_train.shape))
print("x_test.shape: " + str(x_test.shape))
print("y_trains.shape: " + str(y_trains.shape))
print("y_tests.shape: " + str(y_tests.shape))

x_train.shape: (51237, 129, 16, 1)
x_test.shape: (5692, 129, 16, 1)
y_trains.shape: (16, 51237)
y_tests.shape: (16, 5692)


In [9]:
# 楽曲の出力

import sklearn
from sklearn.metrics.pairwise import cosine_similarity
from keras.models import load_model
import soundfile as sf

batch_size = 64
epochs = 100
rhythm_weight = 1

models = []
for i in range(x.shape[2]):
    model_name = "independent_batch" + str(batch_size) + "_e" + str(epochs) + "_" + str(i)
    
    models.append(load_model("../data/models/" + model_name))

file_name = "out_" + "independent_batch" + str(batch_size) + "_e" + str(epochs) + "_rw" + str(rhythm_weight)
num_clips = 10

def add_fade(x, fadetime, sr):
    ft_len = int(fadetime*sr)
    r = np.arange(0, ft_len)*np.pi/ft_len
    w_fo = (0.5+0.5*np.cos(r))**0.5
    w_fi = (0.5-0.5*np.cos(r))**0.5
    
    x[0:ft_len]        *= w_fi
    x[clip_size-ft_len::] *= w_fo
    return x

def gen_xfade(x_pre, x_next, fadetime, sr):
    ft_len = int(fadetime*sr)
    if x_pre is None:
        xfade = x_next
    else:
        x_pre_begin = x_pre[:x_pre.shape[0] - ft_len]
        x_pre_end = x_pre[x_pre.shape[0] - ft_len:]
        x_pre_len = clip_size
        x_next_len = clip_size
        x_pre_len -= ft_len
        x_next_len -= ft_len
        xfade = np.concatenate((x_pre_begin, np.concatenate((x_pre_end, np.zeros(x_next_len))) + x_next))
    return xfade

def create_music_consider_weights(fname):
    first_index = np.random.randint(0, len(cqt_chromas))
    predict_index = first_index
    out = None
    
    for i in range(num_clips):
        print("-- generate " + str(i + 1) + " / " + str(num_clips))
        predict_vec = np.zeros((0, ))
        for i in range(x.shape[2]):
            predict = models[i].predict(np.array([cqt_chromas[predict_index, :-1]]))[0]
            predict_vec = np.hstack((predict_vec, predict))
        predict_chroma = predict_vec[:len(predict_vec)-num_divide]
        predict_rhythm = predict_vec[len(predict_vec)-num_divide:]

        index = 0
        similar_index = 0
        score = -1 - rhythm_weight
        chroma_score = 0
        rhythm_score = 0
        for cqt_chroma in cqt_chromas[:, 0]:
            cqt_chroma_chroma = cqt_chroma[:len(cqt_chroma)-num_divide]
            cqt_chroma_rhythm = cqt_chroma[len(cqt_chroma)-num_divide:]
            tmp_chroma_score = cosine_similarity(np.array([predict_chroma]), np.array([cqt_chroma_chroma]))
            tmp_rhythm_score = cosine_similarity(np.array([predict_rhythm]), np.array([cqt_chroma_rhythm]))
            tmp_score = tmp_chroma_score + rhythm_weight * tmp_rhythm_score
            if tmp_score > score:
                score = tmp_score
                similar_index = index
            index += 1

        print("score: " + str(score))
        print("predict_vec: " + str(predict_vec))
        print("cqt_chromas[similar_index]: " + str(cqt_chromas[similar_index, 0]))
        print("similar_index: " + str(similar_index))
        print("--")

        predict_index = similar_index

        tmp = add_fade(clips[predict_index], 0.1, SAMPLING_RATE)
        out = gen_xfade(out, tmp, 0.1, SAMPLING_RATE)
        
    sf.write(fname, out, SAMPLING_RATE, subtype="PCM_16")

for i in range(10):
    fname = "../data/out/" + file_name + "_" + str(i) + ".wav"
    print("creating " + fname + " ...")
    create_music_consider_weights(fname)

creating ../data/out/out_independent_batch64_e100_rw1_0.wav ...
-- generate 1 / 10
score: [[1.92939776]]
predict_vec: [6.14330522e-04 3.75174754e-03 2.66511855e-03 2.95859179e-03
 4.39830404e-03 1.11790502e-03 2.51734257e-03 2.10295664e-03
 1.54309452e-03 1.89453992e-03 1.34154951e-04 7.24505284e-04
 3.39604740e-05 2.03018153e-05 1.53092205e-05 3.94945586e-04]
cqt_chromas[similar_index]: [2.66527571e-03 3.46180191e-03 3.77465575e-03 2.23188638e-03
 7.81250000e-03 0.00000000e+00 3.80073534e-03 3.01386719e-03
 3.82685382e-03 3.51321464e-03 0.00000000e+00 0.00000000e+00
 1.17237716e-04 1.32995192e-05 4.60267984e-06 1.67128455e-03]
similar_index: 38069
--
-- generate 2 / 10
score: [[1.9122614]]
predict_vec: [1.31163024e-03 8.37792119e-04 2.16699578e-03 6.03249238e-04
 2.10087092e-04 2.10106606e-03 5.37837343e-03 2.94713071e-03
 1.34557521e-03 8.87799077e-04 1.83429208e-03 3.95551819e-04
 2.24869345e-05 1.49235721e-05 1.58144732e-03 1.76588295e-03]
cqt_chromas[similar_index]: [3.8146779e-03

score: [[1.93570389]]
predict_vec: [0.00296896 0.00087361 0.0042322  0.00161896 0.00222776 0.00036811
 0.0009891  0.00117817 0.00044693 0.00058847 0.00101787 0.00081903
 0.00268218 0.00553915 0.00544727 0.00609957]
cqt_chromas[similar_index]: [0.00543026 0.         0.0078125  0.00220278 0.00366077 0.
 0.         0.00155536 0.         0.         0.         0.0033637
 0.00319814 0.00598017 0.00688371 0.00694055]
similar_index: 44181
--
-- generate 8 / 10
score: [[1.96851508]]
predict_vec: [0.00040818 0.00109248 0.00143823 0.00160716 0.00175218 0.00075723
 0.00099169 0.00159089 0.00141598 0.00465693 0.01040367 0.00024937
 0.00590271 0.00675038 0.00666782 0.00704057]
cqt_chromas[similar_index]: [0.         0.00160857 0.00185138 0.00175964 0.         0.00079628
 0.00057924 0.00136594 0.00146986 0.00411214 0.0078125  0.
 0.0019178  0.00160851 0.00148444 0.00177001]
similar_index: 33569
--
-- generate 9 / 10
score: [[1.95754475]]
predict_vec: [1.61799206e-03 5.47726930e-04 1.71424903e-03 5.14

score: [[1.93476795]]
predict_vec: [0.00070051 0.00217575 0.00104861 0.01145962 0.00269754 0.00068669
 0.00203726 0.00171849 0.00057804 0.00136535 0.00188966 0.00085824
 0.00364029 0.00237014 0.00378423 0.00417732]
cqt_chromas[similar_index]: [0.         0.00180472 0.         0.0078125  0.         0.
 0.         0.         0.         0.         0.00194926 0.00057551
 0.00371264 0.00235092 0.00408682 0.00398022]
similar_index: 10655
--
-- generate 4 / 10
score: [[1.93412795]]
predict_vec: [0.00079079 0.00160529 0.00149804 0.00334845 0.00076574 0.00033056
 0.00096783 0.00096372 0.00133907 0.00088881 0.00048469 0.00519845
 0.00229074 0.00406718 0.00370263 0.00542551]
cqt_chromas[similar_index]: [0.         0.         0.0031801  0.00362667 0.0020228  0.
 0.00099924 0.00039445 0.00207216 0.00182069 0.         0.0078125
 0.00149622 0.00209659 0.00207068 0.00260297]
similar_index: 17715
--
-- generate 5 / 10
score: [[1.9364524]]
predict_vec: [0.00843742 0.00188781 0.00115973 0.00525483 0.0008

score: [[1.90766531]]
predict_vec: [0.00079889 0.00175982 0.00089615 0.00077112 0.00291923 0.00298448
 0.00505269 0.00204442 0.00025952 0.00053819 0.00229066 0.00449025
 0.00423755 0.00337551 0.00176842 0.00279387]
cqt_chromas[similar_index]: [0.         0.00177849 0.00353633 0.         0.0046545  0.00626988
 0.0078125  0.00633959 0.         0.         0.00706519 0.00613823
 0.00407239 0.00281093 0.00315274 0.00345364]
similar_index: 51992
--
-- generate 10 / 10
score: [[1.93808766]]
predict_vec: [0.00089422 0.00252598 0.00268232 0.00128869 0.00055743 0.0010863
 0.00853492 0.00251662 0.00314568 0.00045595 0.00222241 0.00062008
 0.00277518 0.00320599 0.0032389  0.0047924 ]
cqt_chromas[similar_index]: [0.         0.00376798 0.00398898 0.         0.         0.
 0.0078125  0.00290366 0.00132065 0.00185347 0.00249237 0.
 0.00103776 0.00122141 0.00130426 0.00172866]
similar_index: 16670
--
creating ../data/out/out_independent_batch64_e100_rw1_2.wav ...
-- generate 1 / 10
score: [[1.94923387]

score: [[1.94754091]]
predict_vec: [0.00056515 0.00063369 0.00683024 0.00069118 0.00350285 0.0002489
 0.00077524 0.00073666 0.00092539 0.0006601  0.00186082 0.00111824
 0.00127406 0.0021524  0.00163705 0.00232117]
cqt_chromas[similar_index]: [0.         0.         0.0078125  0.00190354 0.00524137 0.
 0.         0.         0.         0.         0.00228325 0.
 0.00369073 0.00431867 0.00414949 0.00547495]
similar_index: 4014
--
-- generate 7 / 10
score: [[1.93087578]]
predict_vec: [0.0008334  0.00328767 0.00306553 0.00132999 0.00144524 0.00604139
 0.00115153 0.00098453 0.00093012 0.00257156 0.00295966 0.00186084
 0.00415912 0.00410333 0.0051677  0.00589186]
cqt_chromas[similar_index]: [0.00331036 0.00614101 0.00677046 0.00345596 0.00190554 0.0078125
 0.         0.         0.00190041 0.00363986 0.00321652 0.00215806
 0.00999939 0.01       0.00893789 0.01      ]
similar_index: 7420
--
-- generate 8 / 10
score: [[1.94360181]]
predict_vec: [0.00493417 0.00266369 0.00389549 0.00145232 0.000828

score: [[1.96337416]]
predict_vec: [0.00110901 0.00130571 0.00184065 0.00204719 0.00399596 0.00682983
 0.00054076 0.00131513 0.00110105 0.00143791 0.00106361 0.0004963
 0.00107833 0.00063645 0.00085165 0.00129277]
cqt_chromas[similar_index]: [0.00080101 0.00087711 0.0021903  0.00260817 0.00369076 0.0078125
 0.         0.         0.00034557 0.00122858 0.         0.00103275
 0.0017064  0.00116846 0.00173679 0.00218384]
similar_index: 23294
--
-- generate 3 / 10
score: [[1.95149156]]
predict_vec: [0.00279447 0.00206251 0.0006985  0.00156595 0.00096363 0.00056638
 0.00152541 0.00091036 0.00014026 0.00012953 0.00036299 0.00389157
 0.00116194 0.00174922 0.00200692 0.00234502]
cqt_chromas[similar_index]: [0.00657921 0.00675195 0.         0.00322224 0.00379754 0.
 0.00430199 0.         0.         0.         0.         0.0078125
 0.00318091 0.00437783 0.00513579 0.00633503]
similar_index: 12589
--
-- generate 4 / 10
score: [[1.96521575]]
predict_vec: [0.00302246 0.00034141 0.00145433 0.00223956

score: [[1.90870546]]
predict_vec: [0.00106248 0.00164687 0.00129636 0.00796464 0.00187347 0.00166823
 0.0012732  0.00103424 0.00204661 0.00255486 0.00141858 0.00119747
 0.0015418  0.00149278 0.00183007 0.00243087]
cqt_chromas[similar_index]: [0.00164283 0.00150353 0.00331167 0.0078125  0.         0.
 0.         0.0007603  0.00093776 0.00183545 0.00068097 0.00176857
 0.00147575 0.00158898 0.00241498 0.00250702]
similar_index: 33627
--
-- generate 9 / 10
score: [[1.91486078]]
predict_vec: [0.00135732 0.00127944 0.00165416 0.00743054 0.0010857  0.00262904
 0.00346427 0.00241005 0.00171223 0.00054768 0.0010343  0.00093763
 0.00154019 0.00241509 0.00230856 0.00283846]
cqt_chromas[similar_index]: [0.         0.00457214 0.00309263 0.0078125  0.00043988 0.00424534
 0.00266609 0.003332   0.00341411 0.         0.0011912  0.
 0.00086027 0.00105891 0.00115079 0.00121524]
similar_index: 43774
--
-- generate 10 / 10
score: [[1.92928647]]
predict_vec: [0.00091994 0.00158093 0.00367989 0.00685339 0.0

score: [[1.96334024]]
predict_vec: [0.00389086 0.00740098 0.00358479 0.00088916 0.00178445 0.00182018
 0.00133733 0.00157555 0.00054723 0.00155509 0.00193571 0.0005895
 0.00184504 0.00172063 0.00160021 0.0022692 ]
cqt_chromas[similar_index]: [0.00329038 0.0078125  0.00410816 0.00178653 0.00166792 0.00299954
 0.00182031 0.00063911 0.00177128 0.00177789 0.00219217 0.00172676
 0.00113943 0.00130757 0.00090616 0.00181342]
similar_index: 8351
--
-- generate 6 / 10
score: [[1.93832426]]
predict_vec: [0.00072237 0.00231315 0.0039488  0.0040641  0.00108436 0.00094686
 0.00170026 0.00049637 0.00082269 0.00056267 0.0009757  0.00103787
 0.00128668 0.00091533 0.00170517 0.00194603]
cqt_chromas[similar_index]: [0.00056386 0.00421609 0.0078125  0.00641567 0.         0.00382332
 0.00303731 0.         0.         0.         0.00217069 0.00173438
 0.00413534 0.00171246 0.00612165 0.00777419]
similar_index: 40953
--
-- generate 7 / 10
score: [[1.91180468]]
predict_vec: [0.00143979 0.00463622 0.00201594 0

score: [[1.88272146]]
predict_vec: [0.00138979 0.00066152 0.00085964 0.00047366 0.00409738 0.00017718
 0.00134511 0.00188558 0.00192938 0.00100959 0.00172042 0.00110712
 0.00319011 0.0013559  0.00315426 0.0031328 ]
cqt_chromas[similar_index]: [0.00169629 0.00127265 0.00055343 0.00306778 0.0078125  0.
 0.0023772  0.00172101 0.00147625 0.00138655 0.00258941 0.00120179
 0.00580308 0.00451457 0.00464509 0.00350187]
similar_index: 41403
--
-- generate 2 / 10
score: [[1.9268001]]
predict_vec: [0.00056887 0.00066888 0.00131134 0.0010803  0.00060328 0.00192556
 0.00198736 0.00164797 0.00100409 0.00074516 0.00195269 0.0008827
 0.00440654 0.00456857 0.00327617 0.00534206]
cqt_chromas[similar_index]: [0.00281893 0.         0.00258397 0.00153837 0.00286916 0.00556246
 0.0065763  0.00511503 0.00198767 0.00364251 0.0078125  0.
 0.00317575 0.00304639 0.00307374 0.00473622]
similar_index: 27053
--
-- generate 3 / 10
score: [[1.93539623]]
predict_vec: [0.00092516 0.00064558 0.00944139 0.00229867 0.0005

score: [[1.94736011]]
predict_vec: [0.00093503 0.0051845  0.00132781 0.00289169 0.00109537 0.00078649
 0.0006152  0.00149025 0.00066911 0.00047522 0.00080384 0.00832227
 0.00146192 0.00155321 0.00185459 0.00189286]
cqt_chromas[similar_index]: [0.         0.00571693 0.00323499 0.00228976 0.         0.
 0.         0.         0.         0.         0.         0.0078125
 0.00152183 0.0015515  0.00201606 0.00228073]
similar_index: 43498
--
-- generate 8 / 10
score: [[1.94831093]]
predict_vec: [0.00428086 0.00031714 0.00353597 0.00096897 0.0005575  0.00057629
 0.00069743 0.00145285 0.00065323 0.00048255 0.000626   0.00067924
 0.0015107  0.00207072 0.00209516 0.00239692]
cqt_chromas[similar_index]: [0.0078125  0.         0.005843   0.00171918 0.         0.00050854
 0.         0.00201333 0.         0.         0.         0.
 0.00355954 0.00420234 0.00383827 0.00345364]
similar_index: 53493
--
-- generate 9 / 10
score: [[1.94296433]]
predict_vec: [0.00678393 0.00080455 0.00263981 0.0037013  0.007

score: [[1.9265478]]
predict_vec: [0.00234766 0.00111714 0.00133723 0.00504187 0.00139469 0.00365361
 0.00230138 0.00132003 0.00081371 0.00057317 0.00048001 0.00063567
 0.00241834 0.00216913 0.00453988 0.00447079]
cqt_chromas[similar_index]: [0.00359154 0.00270445 0.         0.0078125  0.00242364 0.00604009
 0.00524528 0.         0.         0.00253579 0.         0.
 0.00344791 0.00446916 0.00553882 0.00681149]
similar_index: 43725
--
-- generate 5 / 10
score: [[1.92923895]]
predict_vec: [0.00142737 0.00398695 0.00053908 0.00104524 0.00174885 0.00069613
 0.00094957 0.00107312 0.00177111 0.0020355  0.00055823 0.00449631
 0.0043515  0.00544569 0.00652188 0.00710674]
cqt_chromas[similar_index]: [0.00190219 0.00767669 0.         0.00189997 0.00236169 0.
 0.0015119  0.         0.00316062 0.00443713 0.         0.0078125
 0.00461892 0.00545735 0.00441587 0.00355845]
similar_index: 56143
--
-- generate 6 / 10
score: [[1.93499131]]
predict_vec: [0.00101793 0.00039123 0.00314648 0.00482693 0.0009

score: [[1.94438486]]
predict_vec: [0.00054584 0.00186865 0.00047552 0.00118781 0.00079136 0.00746919
 0.00164955 0.00106858 0.00356578 0.00277412 0.00053339 0.00776735
 0.00189374 0.00255077 0.00294536 0.00315406]
cqt_chromas[similar_index]: [0.         0.00267369 0.00054198 0.00347222 0.         0.0078125
 0.00448266 0.         0.00381287 0.00464617 0.00205115 0.00749535
 0.00170665 0.00215003 0.00232289 0.00286654]
similar_index: 46477
--
creating ../data/out/out_independent_batch64_e100_rw1_7.wav ...
-- generate 1 / 10
score: [[1.93767441]]
predict_vec: [0.003802   0.00234332 0.005336   0.00078585 0.00145788 0.00088864
 0.00137335 0.00070239 0.00056328 0.00012094 0.00177198 0.00518474
 0.00377564 0.0026978  0.00268798 0.00373471]
cqt_chromas[similar_index]: [0.00630899 0.00245815 0.0078125  0.         0.00332558 0.
 0.         0.         0.         0.         0.00279518 0.00468135
 0.00892289 0.00699914 0.00778173 0.0080177 ]
similar_index: 34191
--
-- generate 2 / 10
score: [[1.91

score: [[1.93614237]]
predict_vec: [0.00470207 0.00242737 0.00198549 0.00149757 0.00299679 0.00134964
 0.00165789 0.00833847 0.00124639 0.00099691 0.00192391 0.00028819
 0.00287129 0.00351723 0.00369376 0.00413109]
cqt_chromas[similar_index]: [0.0046331  0.00279646 0.00151967 0.         0.00409529 0.
 0.00249131 0.0078125  0.         0.         0.         0.
 0.00569295 0.00596743 0.00519061 0.00568099]
similar_index: 15205
--
-- generate 7 / 10
score: [[1.89793811]]
predict_vec: [0.00166385 0.00085647 0.00081182 0.00078762 0.00273005 0.00074946
 0.00074529 0.00091716 0.00098832 0.0003895  0.00120894 0.00332951
 0.00584187 0.00516662 0.00536521 0.00623483]
cqt_chromas[similar_index]: [0.004787   0.00232539 0.00378346 0.         0.00506482 0.00495292
 0.         0.00370048 0.00149871 0.         0.00634194 0.0078125
 0.00450432 0.00450391 0.00358946 0.00422651]
similar_index: 19259
--
-- generate 8 / 10
score: [[1.94413359]]
predict_vec: [0.00138829 0.00125427 0.00164789 0.00209033 0.000

score: [[1.9433213]]
predict_vec: [0.00039729 0.00054121 0.00744171 0.00123572 0.00095155 0.00113214
 0.00260691 0.00715177 0.00049007 0.00070602 0.00172988 0.0006392
 0.00125979 0.00191299 0.00163414 0.00205113]
cqt_chromas[similar_index]: [0.         0.         0.00676675 0.00310507 0.         0.
 0.00416639 0.0078125  0.         0.         0.         0.
 0.00614507 0.00733969 0.0074031  0.00984921]
similar_index: 2810
--
-- generate 4 / 10
score: [[1.94485409]]
predict_vec: [0.00101691 0.00216707 0.00292137 0.00178625 0.00325496 0.00280819
 0.00360963 0.00621411 0.00181405 0.00471708 0.00069398 0.00027659
 0.00720608 0.00728812 0.00948171 0.01047805]
cqt_chromas[similar_index]: [0.         0.00162359 0.00296894 0.00276123 0.00354193 0.00365718
 0.00565003 0.0078125  0.00357852 0.00409635 0.         0.00306947
 0.00258835 0.00273093 0.00285381 0.00380431]
similar_index: 44741
--
-- generate 5 / 10
score: [[1.9578064]]
predict_vec: [0.00035198 0.00073719 0.00062359 0.00140249 0.002057

score: [[1.92609878]]
predict_vec: [0.00185296 0.00106286 0.0027368  0.00945141 0.00540146 0.00689814
 0.00143494 0.00239048 0.00034029 0.00058657 0.00281103 0.00200331
 0.00704438 0.00659006 0.00720493 0.00685009]
cqt_chromas[similar_index]: [0.00098951 0.0012807  0.         0.0078125  0.00586792 0.00674025
 0.         0.00284474 0.00103754 0.         0.00163259 0.00057883
 0.00420511 0.00574742 0.00880166 0.00741834]
similar_index: 32088
--
-- generate 10 / 10
score: [[1.91865855]]
predict_vec: [0.0034833  0.00132317 0.00205037 0.00025126 0.00623761 0.00424419
 0.00137196 0.00127144 0.00094066 0.00032713 0.00044484 0.00077196
 0.00575546 0.00880754 0.00718926 0.00763717]
cqt_chromas[similar_index]: [0.00348428 0.00211359 0.         0.         0.0078125  0.00366443
 0.         0.         0.         0.         0.         0.
 0.00574742 0.00880166 0.00741834 0.00999939]
similar_index: 15584
--
creating ../data/out/out_independent_batch64_e100_rw1_9.wav ...
-- generate 1 / 10
score: [[1.

score: [[1.95364022]]
predict_vec: [0.00094366 0.00097587 0.00096006 0.00084548 0.00683687 0.00335803
 0.00259816 0.00120891 0.00092705 0.00020825 0.00076339 0.00068105
 0.00033707 0.0002636  0.00055116 0.00080237]
cqt_chromas[similar_index]: [0.00087452 0.00132825 0.         0.         0.0078125  0.00369838
 0.00164686 0.00183658 0.         0.         0.         0.001128
 0.00098555 0.00032562 0.00184317 0.00262606]
similar_index: 43563
--
-- generate 6 / 10
score: [[1.93931206]]
predict_vec: [0.00070581 0.0009487  0.00140267 0.00147658 0.00226969 0.00049894
 0.00352937 0.00518969 0.00052035 0.00059841 0.00540848 0.00076126
 0.00031129 0.00188022 0.00244919 0.00246764]
cqt_chromas[similar_index]: [0.00129507 0.00116123 0.00138188 0.00150814 0.00142859 0.00115084
 0.00488352 0.0078125  0.         0.00295429 0.00498839 0.
 0.00032562 0.00184317 0.00262606 0.00206097]
similar_index: 11344
--
-- generate 7 / 10
score: [[1.95965996]]
predict_vec: [0.00104312 0.00085401 0.00091306 0.0008434