In [1]:
from file_encoder import FileEncoder
from data_manager import DataManager
from audio_utils import N_MELS
import numpy as np
import sys
import tensorflow as tf
from tensorflow.keras import layers, models, Model


TARGET_DIR = "../thirdparty/「波音リツ」歌声データベースVer2/DATABASE"
OUTPUT_DIR = "../master/ust/json"
    
PITCH_FEATURES = 1 # MIDI番号を0 ~ 100のスカラー
LYRIC_FEATURES = 1 # 歌詞をインデックスしたのでスカラー
DURATION_FEATURES = 1 # msの長さを正規化したのでスカラー
NOTE_CHUK_INDEX_FEATURES = 1 # 何番目の分割化を正規化したのでスカラー

LYRIC_INDEX_MAX_DIM = 256 # インデックス化した歌詞の最大インデックス(237だったので若干多めに確保)
LYRIC_DIM = 64 # インデックス化した歌詞を埋め込みした後の次元
DURATION_DIM = 1 # 正規化した長さのミリ秒（0ミリ秒 ~ 10000ミリ秒（10秒）)

# TODO: N分割したいが今後考える（ビブラートなどを考慮するにはノートを更に分割したい）
NOTE_CHUNK = 1 # 1ノートに対して1分割する


def build_model():
    pitch_input = layers.Input(shape=(NOTE_CHUNK,PITCH_FEATURES), name="pitch_input")
    lyric_input = layers.Input(shape=(NOTE_CHUNK,), name="lyric_input")
    duration_input = layers.Input(shape=(NOTE_CHUNK,DURATION_FEATURES), name="duration_input")
    # note_chunk_index_input = layers.Input(shape=(NOTE_CHUNK,NOTE_CHUK_INDEX_FEATURES), name="note_chunk_index_input") # 何番目に分割したのか保持
    
    # 歌詞を埋め込み
    lyric_embedding = layers.Embedding(output_dim=LYRIC_DIM, input_dim=LYRIC_INDEX_MAX_DIM, name="lyric_embedding")(lyric_input) # 埋め込み
    
    # 時系列
    lstm_pitch = layers.LSTM(units=64, return_sequences=True)(pitch_input)
    lstm_duration = layers.LSTM(units=64, return_sequences=True)(duration_input)
    lstm_lyric_embedding = layers.LSTM(units=64, return_sequences=True)(lyric_embedding)
    # lstm_note_chunk_index = layers.LSTM(units=64, return_sequences=True)(note_chunk_index_input)
    
    
    merged = layers.Concatenate(axis=-1, name="merged_features")(
        [lstm_pitch, lstm_lyric_embedding, lstm_duration]
        # [lstm_pitch, lstm_lyric_embedding, lstm_duration, lstm_note_chunk_index]
    )
    
    # 最終的な時系列処理
    final_lstm = layers.LSTM(128, return_sequences=True, name="final_lstm")(merged)
    
    # 出力層（例: メルスペクトログラムへの回帰）
    output = layers.Dense(N_MELS, activation="tanh", name="output")(final_lstm)



encoder = FileEncoder(TARGET_DIR, OUTPUT_DIR)
lyric_indexs, durations, notenums, y = encoder.encode()

2025-01-03 18:35:19.094164: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1735896919.180669  471273 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1735896919.207130  471273 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-03 18:35:19.457778: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


parsing ust file... ../thirdparty/「波音リツ」歌声データベースVer2/DATABASE/lost_and_cry/lost_and_cry.ust
done parsing ust file ../thirdparty/「波音リツ」歌声データベースVer2/DATABASE/lost_and_cry/lost_and_cry.ust
parsing ust file... ../thirdparty/「波音リツ」歌声データベースVer2/DATABASE/sacrifice/sacrifice.ust
done parsing ust file ../thirdparty/「波音リツ」歌声データベースVer2/DATABASE/sacrifice/sacrifice.ust
parsing ust file... ../thirdparty/「波音リツ」歌声データベースVer2/DATABASE/aaaa/aaaa.ust
done parsing ust file ../thirdparty/「波音リツ」歌声データベースVer2/DATABASE/aaaa/aaaa.ust
parsing ust file... ../thirdparty/「波音リツ」歌声データベースVer2/DATABASE/WAVE/WAVE.ust
done parsing ust file ../thirdparty/「波音リツ」歌声データベースVer2/DATABASE/WAVE/WAVE.ust
parsing ust file... ../thirdparty/「波音リツ」歌声データベースVer2/DATABASE/stRay/stRay.ust
done parsing ust file ../thirdparty/「波音リツ」歌声データベースVer2/DATABASE/stRay/stRay.ust
parsing ust file... ../thirdparty/「波音リツ」歌声データベースVer2/DATABASE/haruwasakura/haruwasakura.ust
done parsing ust file ../thirdparty/「波音リツ」歌声データベースVer2/DATABASE/haruwasakura/haruw



done encoding y
lyric_indexs [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 10, 11, 0, 12, 13, 14, 15, 6, 16, 17, 18, 19, 19, 20, 21, 0, 8, 19, 22, 18, 19, 23, 24, 25, 2, 18, 15, 26, 0, 15, 27, 0, 28, 1, 24, 29, 16, 17, 30, 1, 0, 31, 31, 32, 12, 2, 8, 28, 16, 16, 17, 33, 20, 34, 0, 15, 26, 18, 13, 18, 4, 2, 35, 36, 21, 11, 37, 34, 0, 38, 35, 39, 40, 13, 28, 16, 0, 41, 3, 19, 18, 42, 28, 19, 0, 15, 30, 29, 23, 28, 43, 37, 8, 13, 1, 0, 8, 44, 24, 18, 19, 26, 19, 32, 14, 45, 44, 9, 13, 46, 19, 0, 19, 23, 42, 19, 10, 24, 39, 15, 47, 38, 20, 48, 2, 11, 0, 23, 39, 3, 44, 29, 28, 40, 34, 22, 18, 19, 23, 49, 20, 0, 10, 23, 2, 34, 17, 18, 38, 50, 28, 29, 9, 6, 11, 0, 19, 35, 24, 51, 52, 53, 13, 2, 11, 19, 34, 16, 0, 18, 32, 32, 12, 39, 24, 20, 22, 2, 11, 34, 16, 0, 42, 1, 54, 3, 18, 26, 23, 42, 16, 55, 9, 51, 0, 19, 35, 0, 3, 16, 44, 19, 15, 53, 56, 11, 0, 3, 9, 57, 30, 32, 52, 8, 34, 3, 42, 18, 43, 3, 0, 19, 35, 28, 19, 58, 30, 32, 37, 26, 30, 29, 37, 17, 0, 48, 25, 32, 1, 24, 22, 2, 0, 24, 8, 35, 46, 19, 