### 构建模型

In [2]:
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Dense, GRU, Embedding, Concatenate, Reshape, TimeDistributed, Dropout, Flatten, Lambda
from keras.optimizers import Adam
from keras.losses import BinaryCrossentropy, SparseCategoricalCrossentropy

# 超参数
noise_dim = 100
num_classes = 8  # 假设有10个类别
max_seq_length = 128  # 最大序列长度
feature_dim = 13  # 特征维度
eos_token = feature_dim + 1  # EOS标记的索引

# 生成器
def build_generator():
    noise_input = Input(shape=(noise_dim,))
    label_input = Input(shape=(1,), dtype='int32')
    
    # 类别标签嵌入
    label_embedding = Embedding(num_classes, 50)(label_input)
    label_embedding = Flatten()(label_embedding)
    
    # 将噪声向量和嵌入后的标签连接
    gen_input = Concatenate()([noise_input, label_embedding])
    
    # 使用全连接层扩展维度
    x = Dense(256 * max_seq_length)(gen_input)
    x = Reshape((max_seq_length, 256))(x)
    
    # 使用GRU生成时序数据
    gru_output, final_state = GRU(128, return_sequences=True, return_state=True)(x)
    gru_output = Dropout(0.3)(gru_output)

    # 使用GRU的状态来预测终止标记
    eos_prob = Dense(1, activation='sigmoid')(final_state)
    
    # 输出层
    out = TimeDistributed(Dense(feature_dim + 1, activation=None))(gru_output)
    out = Lambda(lambda x: tf.nn.softmax(x, axis=-1))(out)
    
    model = Model(inputs=[noise_input, label_input], outputs=[out, eos_prob], name='generator')
    return model

# 判别器
def build_discriminator():
    mfcc_input = Input(shape=(max_seq_length, eos_token))
    label_input = Input(shape=(1,), dtype='int32')
    
    # 类别标签嵌入
    label_embedding = Embedding(num_classes, 50)(label_input)
    label_embedding = Flatten()(label_embedding)
    
    # 使用GRU处理时序数据
    x = GRU(64, return_sequences=True)(mfcc_input)
    x = Dropout(0.3)(x)
    x = GRU(32)(x)
    
    # 将GRU输出和嵌入后的标签连接
    x = Concatenate()([x, label_embedding])
    
    # 真伪判断
    validity = Dense(1, activation='sigmoid')(x)
    
    # 类别预测
    label = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=[mfcc_input, label_input], outputs=[validity, label], name='discriminator')
    return model

# 实例化模型
generator = build_generator()
discriminator = build_discriminator()

# 定义损失函数
def acgan_loss(alpha=0.5):
    # 初始化损失函数
    bce = BinaryCrossentropy(from_logits=True)
    sce = SparseCategoricalCrossentropy(from_logits=True)

    def loss(y_true, y_pred):
        validity, label_pred = y_pred
        y_true_validity, y_true_label = y_true
        
        # 计算真实性损失
        loss_real = bce(y_true_validity, validity)
        
        # 计算辅助分类损失
        loss_aux = sce(y_true_label, label_pred)
        
        # 总损失
        return alpha * loss_real + (1 - alpha) * loss_aux
    
    return loss

# 编译判别器
discriminator.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5),
                      loss=acgan_loss(),
                      metrics=['accuracy'])

# 创建AC-GAN
input_noise = Input(shape=(noise_dim,))
input_label = Input(shape=(1,), dtype='int32')
generated_mfcc = generator([input_noise, input_label])

# 在训练生成器时冻结判别器
discriminator.trainable = False
validity, label_pred = discriminator([generated_mfcc, input_label])

acgan = Model(inputs=[input_noise, input_label], outputs=[validity, label_pred])
acgan.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5), loss=['binary_crossentropy', 'sparse_categorical_crossentropy'])

# 显示模型结构
print(generator.summary())
print(discriminator.summary())

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 embedding (Embedding)          (None, 1, 50)        400         ['input_2[0][0]']                
                                                                                                  
 input_1 (InputLayer)           [(None, 100)]        0           []                               
                                                                                                  
 flatten (Flatten)              (None, 50)           0           ['embedding[0][0]']              
                                                                                              

### 数据处理

In [1]:
import os
import librosa
import numpy as np

def load_audio(file_path):
    # 加载音频文件
    y, sr = librosa.load(file_path)
    return y, sr

def extract_features(y, sr):
    # 提取特征
    n_fft = min(2048, len(y))
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=n_fft).T
    return mfccs

def preprocess_data(label_dir):
    X, y = [], []
    for label in os.listdir(label_dir):
        for file_name in os.listdir(os.path.join(label_dir, label)):
            audio, sr = load_audio(os.path.join(label_dir, label, file_name))
            features = extract_features(audio, sr)
            X.append(features)
            y.append(label)
    return X, y

X, y = preprocess_data('kick_samples')

In [3]:
from keras.utils import to_categorical

cat_dict = {
    "Top":          0,
    "Chest":        1,
    "Signature":    2,
    "Stadium":      3,
    "Punchy":       4,
    "808s":         5,
    "Big":          6,
    "Hardstyle":    7,
}

y = list(map(cat_dict.get, y))
y = np.array(y)

# 数据预处理
# 对类别标签进行one-hot编码
y = to_categorical(y, num_classes=num_classes)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (530,) + inhomogeneous part.

### 模型训练

In [None]:
# 生成随机噪声
def generate_noise(batch_size):
    return np.random.normal(0, 1, (batch_size, noise_dim))

# 生成真实数据批次
def generate_real_batch(batch_size):
    idx = np.random.randint(0, X.shape[0], batch_size)
    mfccs = X[idx]
    labels = y[idx]
    return mfccs, labels

# 生成假数据批次
def generate_fake_batch(batch_size, generator):
    noise = generate_noise(batch_size)
    labels = np.random.randint(0, num_classes, batch_size)
    labels = to_categorical(labels, num_classes=num_classes)
    generated_mfccs, eos_probs = generator.predict([noise, labels])
    eos_indices = np.argmax(generated_mfccs == eos_token, axis=1)
    eos_indices[eos_indices == 0] = max_seq_length  # 如果没有生成EOS，则使用最大长度
    for i, eos_index in enumerate(eos_indices):
        generated_mfccs[i, eos_index:] = 0  # 填充0来表示序列结束
    return generated_mfccs, labels

# 训练判别器
def train_discriminator(discriminator, real_mfccs, real_labels, fake_mfccs, fake_labels):
    # 真实样本标签
    valid_y = np.ones((real_mfccs.shape[0], 1))
    # 假样本标签
    fake_y = np.zeros((fake_mfccs.shape[0], 1))
    
    # 训练判别器
    d_loss_real = discriminator.train_on_batch([real_mfccs, real_labels], [valid_y, real_labels])
    d_loss_fake = discriminator.train_on_batch([fake_mfccs, fake_labels], [fake_y, fake_labels])
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
    return d_loss

# 训练生成器
def train_generator(acgan, batch_size):
    noise = generate_noise(batch_size)
    labels = np.random.randint(0, num_classes, batch_size)
    labels = to_categorical(labels, num_classes=num_classes)
    valid_y = np.ones((batch_size, 1))
    
    # 训练生成器
    g_loss = acgan.train_on_batch([noise, labels], valid_y)
    return g_loss

In [None]:
# from keras.optimizers import Adam
from keras.losses import BinaryCrossentropy, SparseCategoricalCrossentropy
# from keras.metrics import Mean

# 超参数
batch_size = 32
epochs = 1000
sample_interval = 100  # 每隔多少批次保存一次生成的样本
n_critic = 5  # 每训练一轮生成器前先训练判别器的轮数

# 损失函数
bce_loss = BinaryCrossentropy()
scce_loss = SparseCategoricalCrossentropy()

# 主训练循环
d_losses = []
g_losses = []

for epoch in range(epochs):
    for _ in range(n_critic):
        # 生成真实数据批次
        real_mfccs, real_labels = generate_real_batch(batch_size)
        # 生成假数据批次
        fake_mfccs, fake_labels = generate_fake_batch(batch_size, generator)
        # 训练判别器
        d_loss = train_discriminator(discriminator, real_mfccs, real_labels, fake_mfccs, fake_labels)
        
    # 训练生成器
    g_loss = train_generator(acgan, batch_size)
    
    # 记录损失
    d_losses.append(d_loss)
    g_losses.append(g_loss)
    
    # 打印进度
    print(f"{epoch+1}/{epochs} [D loss: {d_loss[0]}, acc: {100*d_loss[3]}] [G loss: {g_loss}]")
    
    # 每隔一定批次保存生成的样本
    if (epoch + 1) % sample_interval == 0:
        # 保存生成器的权重
        generator.save_weights('generator_weights.h5')
        # 可以在这里添加代码来生成一些样本并保存它们