In [1]:
# 環境構築

!apt-get update
!apt-get install -y libsndfile1-dev
!pip install scipy
!pip install scikit-learn
!pip install librosa

/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
Reading package lists... Done
E: Could not open lock file /var/lib/apt/lists/lock - open (13: Permission denied)
E: Unable to lock directory /var/lib/apt/lists/
W: Problem unlinking the file /var/cache/apt/pkgcache.bin - RemoveCaches (13: Permission denied)
W: Problem unlinking the file /var/cache/apt/srcpkgcache.bin - RemoveCaches (13: Permission denied)
/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
E: Could not open lock file /var/lib/dpkg/lock-frontend - open (13: Permission denied)
E: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?
/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /home/a

In [2]:
# clips 作成

SAMPLING_RATE = 44100 # 変更不可

clip_size = 81920 # 楽曲を再構築するパーツ1つあたりの大きさ
step_size = 20480 # clip をずらすときの大きさ
window_size = 10240 # CQT_CHROMA を取得するのに使用するサンプル数
hop_size = 640 # window をずらすときの大きさ

from scipy.io.wavfile import read, write
import glob
import os
import numpy as np
import librosa
import sys

if os.path.isfile("../data/out/clips.wav"):
    print("../data/out/clips.wav exists.")
    files = ["../data/out/clips.wav"]
else:
    # clips.npy をもとにデータを作成する 
    if os.path.isfile("../data/arrays/clips.npy"):
        print("loading ../data/arrays/clips.npy ...")
        clips = np.load("../data/arrays/clips.npy")
        print("creating ../data/out/clips.wav ...")
        write("../data/out/clips.wav", SAMPLING_RATE, clips.reshape((clips.shape[0] * clips.shape[1], )))
        files = ["../data/out/clips.wav"]
        
    # clips,npy がないとき /data/wav44100 内の WAV ファイルを参照する
    else:
        files = glob.glob("../data/wav44100/*.wav")
        #files = ["../data/wav44100/3DEmbodimentFromLines.wav"] # デバッグ用 
        
if len(files) == 0:
    print("WAV ファイルが見つかりませんでした。")
    sys.exit(1)
        
clips_filename = "../data/arrays/c" + str(clip_size) + "_s" + str(step_size) + "_f32_clips"

if os.path.isfile(clips_filename + ".npy"):
    print("loading " + clips_filename + ".npy ...")
    clips = np.load(clips_filename + ".npy")
else:
    print("creating " + clips_filename + ".npy ...")
    raw_data_list = [librosa.load(file, sr=SAMPLING_RATE)[0] for file in files] # 左の音だけ使う

    clips = np.zeros((0, clip_size), dtype=np.float32)
    for raw_data in raw_data_list:
        tmp = [raw_data[i:i + clip_size] for i in range(0, len(raw_data) - clip_size, step_size)]
        clips = np.vstack((clips, np.array(tmp)))
    np.save(clips_filename, clips)

print("The clip array has " + str(clips.shape[0]) + " clips.")

../data/out/clips.wav exists.
loading ../data/arrays/c81920_s20480_f32_clips.npy ...
The clip array has 57148 clips.


In [3]:
# cqt 作成

n_bins = 84

cqt_filename = clips_filename + "_w" + str(window_size) + "_h" + str(hop_size) + "_cqts"

if os.path.isfile(cqt_filename + ".npy"):
    print("loading " + cqt_filename + ".npy ...")
    cqts = np.load(cqt_filename + ".npy")
else:
    print("creating " + cqt_filename + ".npy ...")
    num = 0
    for clip in clips:
        print("cqt progress: clip " + str(num + 1) + " / " + str(len(clips)))
        
        tmp_cqt = librosa.cqt(clip, sr=SAMPLING_RATE, hop_length=hop_size, n_bins=n_bins)
        tmp_cqt = tmp_cqt.reshape((1, tmp_cqt.shape[0], tmp_cqt.shape[1]))
        
        if num == 0:
            cqts = tmp_cqt
        else:
            cqts = np.vstack((cqts, tmp_cqt))
            
        num += 1

    np.save(cqt_filename, cqts)
    
print("The cqt array has " + str(cqts.shape[0]) + " cqts.")
print("cqts.shape: " + str(cqts.shape))
print("Type(cqts[0][0][0]): " + str(type(cqts[0][0][0])))
print("np.max(cqts[0][0]): " + str(np.max(cqts[0][0])))

loading ../data/arrays/c81920_s20480_f32_clips_w10240_h640_cqts.npy ...
The cqt array has 57148 cqts.
cqts.shape: (57148, 84, 129)
Type(cqts[0][0][0]): <class 'numpy.complex64'>
np.max(cqts[0][0]): (0.021538047+0.007131239j)


In [4]:
# cqt_choroma 作成

def Preprocessing(array):
    array = np.abs(array)
    array = np.log(array + 1)
    array = array / np.log(np.finfo(np.float32).max)
    array = array.T
    return array

cqt_chroma_filename = clips_filename + "_w" + str(window_size) + "_h" + str(hop_size) + "_cqt_chromas"

if os.path.isfile(cqt_chroma_filename + ".npy"):
    print("loading " + cqt_chroma_filename + ".npy ...")
    cqt_chromas= np.load(cqt_chroma_filename + ".npy")
else:
    print("creating " + cqt_chroma_filename + ".npy ...")
    num = 0
    for cqt in cqts:
        print("cqt_chroma progress: clip " + str(num + 1) + " / " + str(len(cqts)))
        
        tmp_cqt_chroma = librosa.feature.chroma_cqt(C=cqt, sr=SAMPLING_RATE)
        tmp_cqt_chroma = Preprocessing(tmp_cqt_chroma)
        tmp_cqt_chroma = tmp_cqt_chroma.reshape((1, tmp_cqt_chroma.shape[0], tmp_cqt_chroma.shape[1]))
        
        if num == 0:
            cqt_chromas = tmp_cqt_chroma
        else:
            cqt_chromas = np.vstack((cqt_chromas, tmp_cqt_chroma))
            
        num += 1

    np.save(cqt_chroma_filename, cqt_chromas)
    
print("The cqt_chroma array has " + str(cqt_chromas.shape[0]) + " cqt_chromas.")
print("cqt_chromas.shape: " + str(cqt_chromas.shape)) # clip 番号、window 番号、 chroma 番号 になる
print("Type(cqt_chromas[0][0][0]): " + str(type(cqt_chromas[0][0][0])))
print("np.max(cqt_chromas[0][0]): " + str(np.max(cqt_chromas[0][0])))

loading ../data/arrays/c81920_s20480_f32_clips_w10240_h640_cqt_chromas.npy ...
The cqt_chroma array has 57148 cqt_chromas.
cqt_chromas.shape: (57148, 129, 12)
Type(cqt_chromas[0][0][0]): <class 'numpy.float32'>
np.max(cqt_chromas[0][0]): 0.0078125


In [5]:
# max_norm 作成

num_divide = 4

max_norm_filename = clips_filename + "_d" + str(num_divide) + "_max_norms"

if os.path.isfile(max_norm_filename + ".npy"):
    print("loading " + max_norm_filename + ".npy ...")
    max_norms= np.load(max_norm_filename + ".npy")
else:
    print("creating " + max_norm_filename + ".npy ...")
    num = 0
    for clip in clips:
        print("max_norm progress: clip " + str(num + 1) + " / " + str(len(clips)))
        max_norm = [np.max(clip[int((i / num_divide) * len(clip)):int(((i + 1) / num_divide) * len(clip))] ** 2) for i in range(num_divide)]
    
        if num == 0:
            max_norms = np.array(max_norm)
        else:
            max_norms = np.vstack((max_norms, np.array(max_norm)))
        num += 1
        
    np.save(max_norm_filename, max_norms)
    
print("The max_norm array has " + str(max_norms.shape[0]) + " max_norms.")
print("max_norms.shape: " + str(max_norms.shape))
print("Type(max_norms[0][0]): " + str(type(max_norms[0][0])))
print("np.max(max_norms[0]): " + str(np.max(max_norms[0])))

loading ../data/arrays/c81920_s20480_f32_clips_d4_max_norms.npy ...
The max_norm array has 57148 max_norms.
max_norms.shape: (57148, 4)
Type(max_norms[0][0]): <class 'numpy.float32'>
np.max(max_norms[0]): 0.0179649


In [6]:
# 学習データとテストデータに分ける

def normalize(array):
    if (np.sum(array) == 0):
        return array
    else:
        return array / np.sum(array)
    
cqt_chroma_sum_threshold = 0.01 # 次の window の sum が閾値に満たないときに除外します
test_data_rate = 0.1
weight = 0.01 # max_norm の重み

window_num_per_clip = cqt_chromas.shape[1]

cqt_chromas = np.array([np.hstack((cqt_chromas[i], np.repeat(np.array([max_norms[i]]), cqt_chromas.shape[1], axis=0) * weight)) for i in range(len(max_norms))])
cqt_chromas = np.concatenate([cqt_chromas[:-1, :, :], cqt_chromas[1:, 0, :].reshape(cqt_chromas.shape[0] - 1, 1, cqt_chromas.shape[2])], 1)

p = np.random.permutation(len(cqt_chromas))
cqt_chromas = cqt_chromas[p]
clips = clips[p]

x = np.delete(cqt_chromas, np.where(np.sum(cqt_chromas[:,-1,:], axis=1) < cqt_chroma_sum_threshold)[0], axis=0)

x_test = np.apply_along_axis(normalize, 2, x[:int(x.shape[0] * test_data_rate), :window_num_per_clip, :].reshape(int(x.shape[0] * test_data_rate), window_num_per_clip, x.shape[2], 1))
x_train = np.apply_along_axis(normalize, 2, x[int(x.shape[0] * test_data_rate):, :window_num_per_clip, :].reshape(x.shape[0] - int(x.shape[0] * test_data_rate), window_num_per_clip, x.shape[2], 1))
y_test =  np.apply_along_axis(normalize, 1, x[:int(x.shape[0] * test_data_rate), window_num_per_clip, :])
y_train = np.apply_along_axis(normalize, 1, x[int(x.shape[0] * test_data_rate):, window_num_per_clip, :])

print("x_train.shape: " + str(x_train.shape))
print("x_test.shape: " + str(x_test.shape))
print("y_train.shape: " + str(y_train.shape))
print("y_test.shape: " + str(y_test.shape))

In [7]:
def plot_history(history):
    # 精度の履歴をプロット
    plt.plot(history.history['accuracy'],"o-",label="accuracy")
    plt.plot(history.history['val_accuracy'],"o-",label="val_acc")
    plt.title('model accuracy')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.legend(loc="lower right")
    plt.show()
 
    # 損失の履歴をプロット
    plt.plot(history.history['loss'],"o-",label="loss",)
    plt.plot(history.history['val_loss'],"o-",label="val_loss")
    plt.title('model loss')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(loc='lower right')
    plt.show()

In [8]:
# モデル作成

import keras

from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, Conv2D, Flatten, Input, MaxPooling2D
from keras.optimizers import Adam

learning_rate = 0.001
 
model = keras.models.Sequential()
model.add(Conv2D(32, (3, 1), activation="relu", input_shape=(window_num_per_clip, cqt_chromas.shape[2], 1)))
model.add(MaxPooling2D(pool_size=(2, 1)))
model.add(Dropout(0.5))
model.add(Conv2D(64, (3, 1), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 1)))
model.add(Dropout(0.5))
model.add(Conv2D(128, (3, 1), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 1)))
model.add(Dropout(0.5))
model.add(Conv2D(256, (3, 1), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 1)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(units=cqt_chromas.shape[2]))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',optimizer=Adam(learning_rate=learning_rate),metrics=['accuracy'])

model.build()
model.summary()

2023-03-08 10:41:27.534444: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-08 10:41:27.936790: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-03-08 10:41:30.341959: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/allegro/miniconda3/lib/
2023-03-08 10:41:30.347032: W tensorflow/compiler/xla/st

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 127, 16, 32)       128       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 63, 16, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 63, 16, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 61, 16, 64)        6208      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 30, 16, 64)       0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 30, 16, 64)        0

In [9]:
# 学習
batch_size = 64
epochs = 1000
model_name = "cqt_chroma_batch" + str(batch_size) + "_e" + str(epochs)

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))

model.save("../data/models/" + model_name)

In [10]:
# loss 遷移のプロット

!pip install matplotlib

import matplotlib.pyplot as plt

plot_history(history)

/bin/bash: /home/allegro/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)


In [12]:
# 楽曲の出力

import sklearn
from sklearn.metrics.pairwise import cosine_similarity
from keras.models import load_model
import soundfile as sf

model = load_model("../data/models/" + model_name)
file_name = "out_" + model_name
num_clips = 10

def add_fade(x, fadetime, sr):
    ft_len = int(fadetime*sr)
    r = np.arange(0, ft_len)*np.pi/ft_len
    w_fo = (0.5+0.5*np.cos(r))**0.5
    w_fi = (0.5-0.5*np.cos(r))**0.5
    
    x[0:ft_len]        *= w_fi
    x[clip_size-ft_len::] *= w_fo
    return x

def gen_xfade(x_pre, x_next, fadetime, sr):
    ft_len = int(fadetime*sr)
    if x_pre is None:
        xfade = x_next
    else:
        x_pre_begin = x_pre[:x_pre.shape[0] - ft_len]
        x_pre_end = x_pre[x_pre.shape[0] - ft_len:]
        x_pre_len = clip_size
        x_next_len = clip_size
        x_pre_len -= ft_len
        x_next_len -= ft_len
        xfade = np.concatenate((x_pre_begin, np.concatenate((x_pre_end, np.zeros(x_next_len))) + x_next))
    return xfade

def create_music_consider_weights(fname):
    first_index = np.random.randint(0, len(cqt_chromas))
    predict_index = first_index
    out = None
    rhythm_weight = 100
    
    for i in range(num_clips):
        print("-- generate " + str(i + 1) + " / " + str(num_clips))
        predict = model.predict(np.array([cqt_chromas[predict_index, :-1]]))[0]
        predict_chroma = predict[:len(predict)-num_divide]
        predict_rhythm = predict[len(predict)-num_divide:]

        index = 0
        similar_index = 0
        score = -1 - rhythm_weight
        chroma_score = 0
        rhythm_score = 0
        for cqt_chroma in cqt_chromas[:, 0]:
            cqt_chroma_chroma = cqt_chroma[:len(cqt_chroma)-num_divide]
            cqt_chroma_rhythm = cqt_chroma[len(cqt_chroma)-num_divide:]
            tmp_chroma_score = cosine_similarity(np.array([predict_chroma]), np.array([cqt_chroma_chroma]))
            tmp_rhythm_score = cosine_similarity(np.array([predict_rhythm]), np.array([cqt_chroma_rhythm]))
            tmp_score = tmp_chroma_score + rhythm_weight * tmp_rhythm_score
            if tmp_score > score:
                score = tmp_score
                similar_index = index
            index += 1

        print("score: " + str(score))
        print("predict: " + str(predict))
        print("cqt_chromas[similar_index]: " + str(cqt_chromas[similar_index, 0]))
        print("similar_index: " + str(similar_index))
        print("--")

        predict_index = similar_index

        tmp = add_fade(clips[predict_index], 0.1, SAMPLING_RATE)
        out = gen_xfade(out, tmp, 0.1, SAMPLING_RATE)
        
    sf.write(fname, out, SAMPLING_RATE, subtype="PCM_16")

for i in range(10):
    fname = "../data/out/" + file_name + "_" + str(i) + ".wav"
    print("creating " + fname + " ...")
    create_music_consider_weights(fname)

creating ../data/out/out_cqt_chroma_batch64_e1000_0.wav ...
-- generate 1 / 10


2023-03-08 10:41:53.880781: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100
2023-03-08 10:41:56.947079: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-03-08 10:41:56.985386: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-03-08 10:41:56.985455: W tensorflow/compiler/xla/stream_executor/gpu/asm_compiler.cc:85] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2023-03-08 10:41:57.026523: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-03-08 10:41:57.026657: W tensorflow/compiler/xla/stream_executor/gpu/redzone_allocator.cc:318] INTERNAL: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.




2023-03-08 10:41:59.865016: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


score: [[100.80793]]
predict: [0.08278503 0.07514214 0.11032878 0.08525775 0.11297214 0.0609618
 0.06224582 0.08116625 0.04842107 0.0532167  0.03473805 0.04729819
 0.03422814 0.03337161 0.02994772 0.0479189 ]
cqt_chromas[similar_index]: [0.00643813 0.00269565 0.00533502 0.00319162 0.0078125  0.00609526
 0.         0.00374359 0.00412993 0.         0.         0.
 0.00349393 0.00310461 0.00285283 0.00446835]
similar_index: 44212
--
-- generate 2 / 10
score: [[100.7795]]
predict: [0.08965363 0.08397525 0.12445346 0.08770636 0.08943796 0.06551448
 0.06636736 0.08623196 0.06277449 0.0518872  0.03785157 0.03789686
 0.02384035 0.02295955 0.02233302 0.04711656]
cqt_chromas[similar_index]: [0.         0.0064963  0.00701139 0.00423495 0.         0.00476548
 0.00395541 0.0078125  0.00492818 0.00487013 0.00357388 0.00201672
 0.00159629 0.00156765 0.0017117  0.00339685]
similar_index: 13568
--
-- generate 3 / 10
score: [[100.75656]]
predict: [0.12298697 0.06923212 0.1199774  0.08740896 0.093083   0.

score: [[100.74462]]
predict: [0.09175535 0.08123896 0.1250859  0.09194626 0.09135815 0.06804981
 0.0719832  0.08809143 0.06560396 0.05571581 0.04115765 0.0401011
 0.00798255 0.00981018 0.02031965 0.04980005]
cqt_chromas[similar_index]: [0.0062551  0.00244759 0.00656621 0.00662343 0.0078125  0.
 0.0046405  0.001143   0.00159999 0.0032978  0.00092872 0.00230759
 0.00206791 0.00222251 0.00378776 0.01      ]
similar_index: 6694
--
-- generate 9 / 10
score: [[100.80121]]
predict: [0.08936434 0.08365963 0.11841623 0.08813516 0.09336279 0.06677578
 0.06689984 0.08471669 0.05402711 0.05296712 0.03645368 0.03693896
 0.02131199 0.02469255 0.03105735 0.05122086]
cqt_chromas[similar_index]: [0.00716418 0.0078125  0.00593198 0.00733433 0.00630329 0.00472998
 0.         0.         0.00469314 0.         0.         0.
 0.00304875 0.00322062 0.00440655 0.00686903]
similar_index: 20197
--
-- generate 10 / 10
score: [[100.81865]]
predict: [0.11389839 0.07869763 0.11728725 0.0856799  0.08585993 0.0670696

score: [[100.73241]]
predict: [0.09545727 0.08688089 0.12405412 0.08967775 0.09326557 0.06654326
 0.06872703 0.08743026 0.05912007 0.06135785 0.03826378 0.04083395
 0.00483984 0.00782758 0.02207257 0.05364822]
cqt_chromas[similar_index]: [0.0023252  0.00252599 0.00300383 0.00714982 0.         0.00749385
 0.         0.0078125  0.00387898 0.00405609 0.         0.0006475
 0.00026003 0.00040495 0.00114026 0.00298238]
similar_index: 1110
--
-- generate 5 / 10
score: [[100.73241]]
predict: [0.09545727 0.08688089 0.12405412 0.08967775 0.09326557 0.06654326
 0.06872703 0.08743026 0.05912007 0.06135785 0.03826378 0.04083395
 0.00483984 0.00782758 0.02207257 0.05364822]
cqt_chromas[similar_index]: [0.0023252  0.00252599 0.00300383 0.00714982 0.         0.00749385
 0.         0.0078125  0.00387898 0.00405609 0.         0.0006475
 0.00026003 0.00040495 0.00114026 0.00298238]
similar_index: 1110
--
-- generate 6 / 10
score: [[100.73241]]
predict: [0.09545727 0.08688089 0.12405412 0.08967775 0.09326

score: [[100.80121]]
predict: [0.08936434 0.08365963 0.11841623 0.08813516 0.09336279 0.06677578
 0.06689984 0.08471669 0.05402711 0.05296712 0.03645368 0.03693896
 0.02131199 0.02469255 0.03105735 0.05122086]
cqt_chromas[similar_index]: [0.00716418 0.0078125  0.00593198 0.00733433 0.00630329 0.00472998
 0.         0.         0.00469314 0.         0.         0.
 0.00304875 0.00322062 0.00440655 0.00686903]
similar_index: 20197
--
creating ../data/out/out_cqt_chroma_batch64_e1000_5.wav ...
-- generate 1 / 10
score: [[100.67422]]
predict: [0.11058804 0.08435723 0.12627642 0.08560137 0.09132294 0.06651109
 0.0686724  0.09219091 0.06260529 0.06234008 0.04065772 0.04032686
 0.00472013 0.00509544 0.00810137 0.05063266]
cqt_chromas[similar_index]: [0.00513955 0.         0.00251805 0.00278354 0.         0.00153767
 0.00572838 0.0078125  0.00333185 0.00265335 0.00659186 0.
 0.00016507 0.00015171 0.00036368 0.00228189]
similar_index: 42370
--
-- generate 2 / 10
score: [[100.73591]]
predict: [0.0

score: [[100.788124]]
predict: [0.08723351 0.08266983 0.11735541 0.08796516 0.08714562 0.07097481
 0.06715119 0.08830532 0.05661761 0.05681165 0.03909063 0.03818333
 0.02394934 0.02420294 0.02541225 0.04693151]
cqt_chromas[similar_index]: [0.00601903 0.00535267 0.         0.00180104 0.00363417 0.00227823
 0.00374694 0.0078125  0.00482283 0.00630992 0.         0.
 0.00317575 0.003162   0.00349682 0.00630349]
similar_index: 28083
--
-- generate 7 / 10
score: [[100.788124]]
predict: [0.08723351 0.08266983 0.11735541 0.08796516 0.08714562 0.07097481
 0.06715119 0.08830532 0.05661761 0.05681165 0.03909063 0.03818333
 0.02394934 0.02420294 0.02541225 0.04693151]
cqt_chromas[similar_index]: [0.00601903 0.00535267 0.         0.00180104 0.00363417 0.00227823
 0.00374694 0.0078125  0.00482283 0.00630992 0.         0.
 0.00317575 0.003162   0.00349682 0.00630349]
similar_index: 28083
--
-- generate 8 / 10
score: [[100.788124]]
predict: [0.08723351 0.08266983 0.11735541 0.08796516 0.08714562 0.070

score: [[100.7499]]
predict: [0.09482288 0.08589348 0.12900387 0.08991212 0.09072185 0.07240278
 0.07387362 0.09438282 0.06354    0.0597623  0.04128661 0.03942016
 0.0045734  0.00489447 0.00554065 0.04996895]
cqt_chromas[similar_index]: [0.         0.0078125  0.00303716 0.00541394 0.00645345 0.
 0.00219631 0.00225986 0.00178224 0.00556105 0.00257594 0.
 0.00071451 0.00095853 0.00094234 0.01      ]
similar_index: 5112
--
-- generate 3 / 10
score: [[100.68342]]
predict: [0.09362917 0.08191266 0.12904005 0.08559942 0.09008636 0.06806583
 0.06783707 0.08913086 0.05900212 0.05636883 0.03680365 0.038456
 0.00573169 0.0065023  0.03195216 0.0598818 ]
cqt_chromas[similar_index]: [0.         0.00627821 0.0044666  0.         0.0078125  0.
 0.00481636 0.00352849 0.         0.         0.00116088 0.
 0.00050504 0.00054832 0.00304807 0.00534387]
similar_index: 36920
--
-- generate 4 / 10
score: [[100.666214]]
predict: [0.09770066 0.08229709 0.12472674 0.08637724 0.09948646 0.06625514
 0.06722509 0.08

score: [[100.74462]]
predict: [0.09175535 0.08123896 0.1250859  0.09194626 0.09135815 0.06804981
 0.0719832  0.08809143 0.06560396 0.05571581 0.04115765 0.0401011
 0.00798255 0.00981018 0.02031965 0.04980005]
cqt_chromas[similar_index]: [0.0062551  0.00244759 0.00656621 0.00662343 0.0078125  0.
 0.0046405  0.001143   0.00159999 0.0032978  0.00092872 0.00230759
 0.00206791 0.00222251 0.00378776 0.01      ]
similar_index: 6694
--
-- generate 10 / 10
score: [[100.80121]]
predict: [0.08936434 0.08365963 0.11841623 0.08813516 0.09336279 0.06677578
 0.06689984 0.08471669 0.05402711 0.05296712 0.03645368 0.03693896
 0.02131199 0.02469255 0.03105735 0.05122086]
cqt_chromas[similar_index]: [0.00716418 0.0078125  0.00593198 0.00733433 0.00630329 0.00472998
 0.         0.         0.00469314 0.         0.         0.
 0.00304875 0.00322062 0.00440655 0.00686903]
similar_index: 20197
--
