本檔案使用資料處理後的MFCC圖進行訓練與預測

為了滿足預訓練模型所需，在進入模型之前先將影像疊為三通道。

訓練結果的模型權重將存為「mfcc.h5'」

In [None]:
import pandas as pd
import numpy as np
import math
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from keras_self_attention import SeqSelfAttention
from tensorflow.keras import initializers, regularizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D,Conv2D, MaxPooling1D, MaxPooling2D, Flatten, Dropout,Layer,Concatenate,Input,Activation
from tensorflow.keras.layers import BatchNormalization,LayerNormalization,LeakyReLU,ReLU,Add,AveragePooling1D
from tensorflow.keras.layers import GlobalAveragePooling1D, Reshape, multiply, add, GlobalMaxPooling1D, Multiply
from tensorflow.keras.layers import Masking, Bidirectional, LSTM
from tensorflow.keras.applications import EfficientNetB0
from focal_loss import SparseCategoricalFocalLoss

In [None]:
sample_rate = 16000
data = np.load('/home/user8008//sdk/sail/audio/cup/final/train_data(1000)(normalize).npz')
data_MFCC = np.load('/home/user8008/sdk/sail/audio/cup/final/MFCCtrain_data(1000).npz')

In [None]:
# 經過資料處理後的
train_y = data['train_y']
train_y_bln =  data['train_y_bln']
train_mfcc = data_MFCC['MFCC']
train_mfcc_3 = np.stack((train_mfcc, train_mfcc, train_mfcc), axis=3) # 將資料疊為三通道
train_mfcc.shape, train_y.shape, train_y_bln.shape,train_mfcc_3.shape

In [None]:
mfcc_x_train, mfcc_x_val, y_train, y_val = train_test_split(train_mfcc_3,train_y,test_size=0.2,random_state=5473,stratify=train_y_bln)

In [None]:
def gelu(x):
    return 0.5 * x * (1.0 + K.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * x * x * x)))

def spatialAttention_eff(input_feature,kernel_size=7,name=""):
    cbam_feature=input_feature
    avg_pool=tf.reduce_mean(input_feature,axis=3,keepdims=True)
    max_pool=tf.reduce_max(input_feature,axis=3,keepdims=True)
    concat=Concatenate(axis=3)([avg_pool,max_pool])
    cbam_feature=Conv1D(filters=80,kernel_size=7,strides=1,padding='same',use_bias=False,name="spatial_attention_" + str(name), dilation_rate=2)(concat)
    cbam_feature=Activation('sigmoid')(cbam_feature)
    out=Multiply()([input_feature,cbam_feature])
    return out

def residual_block_eff(x, filters, conv_num=3, activation="relu"):
    s = Conv1D(filters, 1, padding="same", dilation_rate=12)(x)  # 5
    for i in range(conv_num - 1):

        x = Conv1D(filters, 3, padding="same", dilation_rate=12)(x)
        x = ReLU()(x)
        x = Conv1D(filters, 3, padding="same", dilation_rate=12)(x)
        x = Add()([x, s])
        x = ReLU()(x)
    return MaxPooling2D(pool_size=3, strides=2,name='residual_end')(x)


model_mfcc = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(43,43,3), pooling='max')  # avg
layer = tf.keras.models.Model(inputs=model_mfcc.input, outputs=model_mfcc.get_layer('block4c_project_conv').output)
x = layer.output
x = Dropout(0.25)(x)
x_sp = spatialAttention_eff(x,kernel_size=4,name="")
x = residual_block_eff(x_sp,8,3)
x = Flatten()(x)
x = Dropout(0.3)(x)
x = Dense(1280,kernel_regularizer=regularizers.l2(0.001))(x)
x = gelu(x)
x = Dropout(0.1)(x)
x = Dense(128,kernel_regularizer=regularizers.l2(0.001))(x)
x = gelu(x)
output = Dense(units=5, activation='softmax')(x)
model_mfcc = tf.keras.models.Model(inputs=model_mfcc.input, outputs=output)

In [None]:
lr = 0.00003
class_weight = (0.1,0.153,0.153,0.207,0.376)

model_mfcc.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
              loss=SparseCategoricalFocalLoss(gamma=2,class_weight=class_weight),
              metrics=['accuracy'])

In [None]:
batch_size = 64
epoch = 150

model_history = model_mfcc.fit([mfcc_x_train],y_train,
                          validation_data = ([mfcc_x_val],y_val),
                          batch_size=batch_size,
                          epochs=epoch)

In [None]:
# model_mfcc.save_weights('mfcc.h5')