本檔案使用資料處理後的音訊資料語病史進行訓練與預測

訓練結果的模型權重將存為「sincnet.h5'」

In [None]:
import pandas as pd
import numpy as np
import math
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from keras_self_attention import SeqSelfAttention
from tensorflow.keras import initializers, regularizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D,Conv2D, MaxPooling1D, MaxPooling2D, Flatten, Dropout,Layer,Concatenate,Input,Activation
from tensorflow.keras.layers import BatchNormalization,LayerNormalization,LeakyReLU,ReLU,Add,AveragePooling1D
from tensorflow.keras.layers import GlobalAveragePooling1D, Reshape, multiply, add, GlobalMaxPooling1D, Multiply
from tensorflow.keras.layers import Masking, Bidirectional, LSTM
from sincnet_tensorflow import SincConv1D
from focal_loss import SparseCategoricalFocalLoss

In [None]:
sample_rate = 16000
data = np.load('/home/user8008//sdk/sail/audio/cup/final/train_data(1000)(normalize).npz')

In [None]:
train_audio = data['train_audio']
train_clinical = data['train_clinical']
train_y = data['train_y']
train_y_bln =  data['train_y_bln']

In [None]:
audio_x_train, audio_x_val, y_train, y_val = train_test_split(train_audio,train_y,test_size=0.2,random_state=5473,stratify=train_y_bln)
clinical_x_train, clinical_x_val, _,_ = train_test_split(train_clinical,train_y,test_size=0.2,random_state=5473,stratify=train_y_bln)

In [None]:
sinc_layer251 = SincConv1D(N_filt=60, Filt_dim=251, fs=sample_rate, stride=24, padding="SAME")
sinc_layer501 = SincConv1D(N_filt=60, Filt_dim=501, fs=sample_rate, stride=24, padding="SAME")
sinc_layer1001 = SincConv1D(N_filt=60, Filt_dim=1001, fs=sample_rate, stride=24, padding="SAME")

In [None]:
def residual_block(x, filters, conv_num=3, activation="relu"):
    s = Conv1D(filters, 1, padding="same", dilation_rate=12)(x)
    for i in range(conv_num - 1):
        x = Conv1D(filters, 3, padding="same", dilation_rate=12)(x)
        x = gelu(x)
        x = Conv1D(filters, 3, padding="same", dilation_rate=12)(x)
        x = Add()([x, s])
        x = gelu(x)
    return MaxPooling1D(pool_size=3, strides=2,name='residual_end')(x)

def channelAttention(input_feature,ratio=8,name=""):
    channel=input_feature.shape[-1]
    shared_layer_one=Dense(channel//ratio,activation='relu',use_bias=False,name="channel_attention_shared_one_" + str(name))
    shared_layer_two=Dense(channel,use_bias=False,name="channel_attention_shared_two_" + str(name))
    avg_pool=GlobalAveragePooling1D()(input_feature)
    max_pool=GlobalMaxPooling1D()(input_feature)
    avg_pool=Reshape((1,channel))(avg_pool)
    max_pool=Reshape((1,channel))(max_pool)
    avg_pool=shared_layer_one(avg_pool)
    max_pool=shared_layer_one(max_pool)
    avg_pool=shared_layer_two(avg_pool)
    max_pool=shared_layer_two(max_pool)
    cbam_feature=Add()([avg_pool,max_pool])
    cbam_feature=Activation('sigmoid')(cbam_feature)
    out=Multiply()([input_feature,cbam_feature])
    return out

def spatialAttention(input_feature,kernel_size=7,name=""):
    cbam_feature=input_feature
    avg_pool=tf.reduce_mean(input_feature,axis=2,keepdims=True)
    max_pool=tf.reduce_max(input_feature,axis=2,keepdims=True)
    concat=Concatenate(axis=2)([avg_pool,max_pool])
    cbam_feature=Conv1D(filters=1,kernel_size=7,strides=1,padding='same',use_bias=False,name="spatial_attention_" + str(name),dilation_rate=2)(concat)
    cbam_feature=Activation('sigmoid')(cbam_feature)
    out=Multiply()([input_feature,cbam_feature])
    return out

def cbamBlock(cbam_feature,ratio=8,name=""):
    cbam_feature=channelAttention(cbam_feature,ratio,name)
    cbam_feature=spatialAttention(cbam_feature,name)
    return cbam_featureGnb_proba

class TemporalAttention(keras.Model):
	def __init__(self,fin,fout=1):
		super(TemporalAttention,self).__init__()
		self.fin = fin 
		self.fout = fout 
		
		self.initializer = initializers.GlorotUniform() 
		self.w = tf.Variable(self.initializer(shape=[self.fin, self.fout], dtype=tf.float32))
	def call(self,h):
		x = h 
		alpha = h @ self.w 
		alpha = tf.nn.softmax(tf.tanh(alpha),1)
		x = tf.einsum('ijk,ijm->ikm', alpha, x) 
		return tf.squeeze(x,[1]) 
    
def gelu(x):
    return 0.5 * x * (1.0 + K.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * x * x * x)))

In [None]:
def build_model():
    inputs = Input((48000, 1))
    finput_shape_1 = Input((10,))
    x1 = sinc_layer251(inputs)
    x1 = Dropout(0.05)(x1)
    x2 = sinc_layer501(inputs)
    x2 = Dropout(0.05)(x2)
    x3 = sinc_layer1001(inputs)
    x3 = Dropout(0.05)(x3)
    x = Concatenate(name="ms_sincnet")([x1,x2,x3])
    x = MaxPooling1D(pool_size=3, name="MaxPooling")(x)
    x = LayerNormalization(name="Lnor1")(x)       
    xx = LeakyReLU(alpha=0.2)(x)
    x_sp = spatialAttention(xx,kernel_size=7,name="")
    x_ch = channelAttention(xx,ratio=10,name="")
    x = Concatenate(name="Concat_sp_and_ch")([x_sp,x_ch])
    x = Dropout(0.25)(x)
    x = residual_block(x,16,3)
    x = Masking(mask_value=0.0)(x)
    x = Bidirectional(LSTM(20, return_sequences=True))(x)
    x = SeqSelfAttention(attention_activation='sigmoid')(x)     
    x = TemporalAttention(40,1)(x)
    x = Dropout(0.25)(x)
    xf = Dense(300)(finput_shape_1)
    xf = gelu(xf)
    xf = Dropout(0.25)(xf)    
    xf = Dense(100)(xf)
    xf = gelu(xf)
    xf = Dense(25)(xf)
    xf = gelu(xf)
    x = Concatenate()([x, xf])
    x = Dense(8,kernel_regularizer=regularizers.l2(0.001))(x)
    x = gelu(x)

    outputs = Dense(5, activation="softmax", name="output")(x)
    return tf.keras.models.Model(inputs=[inputs,finput_shape_1],outputs=outputs)

In [None]:
lr = 0.0003
class_weight = (0.1,0.153,0.153,0.207,0.376)
model = build_model()

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
              loss=SparseCategoricalFocalLoss(gamma=2,class_weight=class_weight),
              metrics=['accuracy'])

In [None]:
batch_size = 128
epoch = 150

model_history = model.fit([audio_x_train, clinical_x_train],y_train,
                          validation_data=([audio_x_val, clinical_x_val],y_val),
                          batch_size=batch_size,
                          epochs=epoch)

In [None]:
# model.save_weights('sincnet.h5')