## 資料處理

In [1]:
import numpy as np
import json
import os

In [2]:
root_path="D:/DeepLearning-workplace/taiko_generate/preprocessing/audio_np"
audio_path = os.listdir(root_path)
audio_path = [ root_path+"/"+ad for ad in audio_path]

root_path="D:/DeepLearning-workplace/taiko_generate/preprocessing/map_np"
map_path = os.listdir(root_path)
map_path = [ root_path+"/"+mp for mp in map_path]

In [3]:
class data_generator:
    def __init__(self, audio_path, map_path, batch_size=64):
        self.audio_path = audio_path
        self.map_path = map_path
        self.batch_size = batch_size
        data_len=0
        #self.steps=7782
        for audio in self.audio_path:
            audio = np.load(audio)
            data_len+=len(audio)
        self.steps =data_len // self.batch_size
        if data_len % self.batch_size != 0:
            self.steps += 1
    def __len__(self):
        return self.steps
    def __iter__(self):
        while True:
            for i, audio in enumerate(self.audio_path): #所有的音檔path
                audio = np.load(audio) #讀音檔
                f = open(self.map_path[i], "r") #讀所有的map
                maps = json.load(f)
                f.close()

                for key in maps.keys(): #讀所有難度的map
                    step=1 #紀錄batch step
                    for idx in range(0,len(audio),self.batch_size): #每次取batch_size大小的資料出來
                        if idx+self.batch_size > len(audio):
                            X = np.array(audio[idx:len(audio)]) #cnn input
                        else:
                             X = np.array(audio[idx:idx+self.batch_size])
                        C = np.zeros((len(X),5)) #condition
                        Y = np.zeros(len(X)) #output
                        C[:,int(key)]=1

                        n=0 #已被記錄過的數量
                        for t,_ in maps[key]:
                            if (t-7)>step*self.batch_size: #音檔前70ms不取
                                break
                            else:
                                Y[t-(step-1)*self.batch_size-8]=1
                                n+=1
                        for i in range(n): #把紀錄過的資料pop掉
                            maps[key].pop(0)
                        step+=1
                        yield [X, C], Y

In [4]:
# class data_generator:
#     def __init__(self, audio_path, map_path, batch_size=64):
#         self.audio_path = audio_path
#         self.map_path = map_path
#         self.batch_size = batch_size
#         data_len=0
#         #self.steps=7782
#         for audio in self.audio_path:
#             audio = np.load(audio)
#             data_len+=len(audio)
#         self.steps =data_len // self.batch_size
#         if data_len % self.batch_size != 0:
#             self.steps += 1
#     def __len__(self):
#         return self.steps
#     def __iter__(self):
#         while True:
#             for i, audio in enumerate(audio_path): #所有的音檔path
#                 audio = np.load(audio) #讀音檔
#                 f = open(map_path[i], "r") #讀所有的map
#                 maps = json.load(f)
#                 f.close()

#                 steps=len(audio)//self.batch_size
#                 for key in maps.keys(): #讀所有難度的map
#                     for _ in range(steps):
#                         t_idxs = list(range(len(maps[key])))#產生打點亂數idx
#                         np.random.shuffle(t_idxs)
#                         t_idxs=[maps[key][i][0]-8 for i in t_idxs]

#                         idxs = list(range(len(audio))) #產生亂數idx
#                         np.random.shuffle(idxs)
#                         idxs = idxs[:self.batch_size//2]

#                         Y = np.zeros(self.batch_size) #產生Y
#                         for i, idx in enumerate(idxs):
#                             if idx in t_idxs: #檢查亂數idx是否有打點
#                                 Y[i]=1
#                         Y[self.batch_size//2:]=1

#                         t_idxs=t_idxs[:self.batch_size//2]
#                         X = [audio[i] for i in idxs] #產生X
#                         for t_idx in t_idxs:
#                             X.append(audio[t_idx])
#                         X=np.array(X)

#                         C = np.zeros((self.batch_size,5)) #condition
#                         C[:,int(key)]=1

#                         yield [X, C], Y

In [4]:
train_D = data_generator(audio_path,map_path,128)
train_D.__len__()

7782

## 模型建立

In [5]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ModelCheckpoint

In [6]:
inputs=Input(shape=(15,80,3))
x = Conv2D(10,(7,3),activation='relu',padding='same')(inputs)
x = MaxPooling2D(pool_size=(1,3),padding='same')(x)
x = Conv2D(20,(3,3),activation='relu',padding='same')(x)
x = MaxPooling2D(pool_size=(1,3),padding='same')(x)
flatten_layer=Flatten()
x = flatten_layer(x)

diffs = Input(shape=(5))
x = Concatenate(axis=1)([x,diffs])
x = Dense(256,activation='sigmoid')(x)
x = Dense(128,activation='sigmoid')(x)
outputs = Dense(1,activation='sigmoid')(x)

model=Model([inputs,diffs],outputs)

In [7]:
def loss_fun(y_true, y_pred):
    return K.binary_crossentropy(y_pred,y_true)#+y_true*K.binary_crossentropy(y_pred,y_true)

mc = ModelCheckpoint(
    'D:/DeepLearning-workplace/taiko_generate/StepPlacement/lstm_model_checkpoints/dense_model{epoch:03d}-loss{loss:.3f}.h5', 
    monitor='loss', 
    save_best_only=True, 
)

In [8]:
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 15, 80, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 15, 80, 10)   640         input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 15, 27, 10)   0           conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 15, 27, 20)   1820        max_pooling2d[0][0]              
______________________________________________________________________________________________

In [9]:
history = model.fit(
    train_D.__iter__(),
    steps_per_epoch=train_D.__len__(),
    epochs=100,
    callbacks=[mc]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100


Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [10]:
model.save("D:/DeepLearning-workplace/taiko_generate/model/StepPlacementDense100.h5")

## 測試

In [11]:
from tensorflow import keras
import librosa
from librosa.util import peak_pick
import matplotlib.pyplot as plt
from scipy import signal

In [19]:
model = keras.models.load_model("D:/DeepLearning-workplace/taiko_generate/StepPlacement/lstm_model_checkpoints/dense_model008-loss0.085.h5")

In [24]:
F_measure_list=[]
for i,ap in enumerate(audio_path):
    audio=np.load(ap)
    f = open(map_path[i], "r") #讀所有的map
    maps = json.load(f)
    f.close()
    
    for key in maps.keys():
        dif = np.zeros(shape=(audio.shape[0],5))
        dif[:,int(key)]=1
        result = model.predict([audio, dif])

        data = []
        for r in result:
            data.append(r[0])
        data=np.array(data)

        data = data*1000
        data = data.astype(int)

        win = signal.windows.hamming(50)
        x = signal.convolve(data,win,mode='same')/sum(win)

        peaks, _ = signal.find_peaks(x, prominence=1)
        
        
        
        # 取出實際map之節拍毫秒值
        realMiliSecs = list()
        for ele in maps[key]:
            realMiliSecs.append(ele[0])

        # 誤差區間值
        DET_RANGE = 10
        # 計算TP, FP, FN score以得出精準度
        tpScore, fpScore, fnScore, previousRealMiliSecs, hitFlag = 0, 0, 0, 0, False
        for guessMiliSecs in peaks:
            # 取得與猜測時間點最近的實際時間點
            closestRealMiliSecs = min(realMiliSecs, key = lambda x:abs(x-guessMiliSecs))
            if previousRealMiliSecs != closestRealMiliSecs:
                if not hitFlag:
                    fnScore += 1
                hitFlag = False
            #print([closestRealMiliSecs, guessMiliSecs])
            if (closestRealMiliSecs - 8) + DET_RANGE > guessMiliSecs > (closestRealMiliSecs - 8) - DET_RANGE:
                #print('hit')
                tpScore += 1
                hitFlag = True
            else:
                fpScore += 1
            previousRealMiliSecs = closestRealMiliSecs

        F_measure = 2 * tpScore / (2 * tpScore + fpScore + fnScore)
        F_measure_list.append(F_measure)
        print(F_measure)

0.37777777777777777
0.39166666666666666
0.4297520661157025
0.36801752464403065
0.44534412955465585
0.36015325670498083
0.3412526997840173
0.4405458089668616
0.4


KeyboardInterrupt: 

In [21]:
print(sum(F_measure_list)/len(F_measure_list))

0.6039335382592205


In [None]:
#epoch:100 loss:0.1012 F-measure:0.6506620531952721
#epoch:8 loss:0.0776 F-measure:0.6039335382592205