### 数据处理与特征提取

在 `kick_samples` 文件夹中有 8 个子目录，分别对于不同 kick 样本的类别。采用独热编码，编码为长度为 8 的向量。

- 首先通过 `librosa` 加载音频文件

In [1]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split

def load_audio(file_path):
    # 加载音频文件
    y, sr = librosa.load(file_path, sr=16000)
    return y, sr

def extract_features(y, sr):
    # 提取特征
    n_fft = min(2048, len(y))
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=n_fft).T
    return mfccs

def preprocess_data(label_dir):
    X, y = [], []
    for label in os.listdir(label_dir):
        for file_name in os.listdir(os.path.join(label_dir, label)):
            audio, sr = load_audio(os.path.join(label_dir, label, file_name))
            features = extract_features(audio, sr)
            X.append(features)
            y.append(label)
    return X, y


X, y = preprocess_data('kick_samples')
print(X[0])

[[-234.05597    203.531       49.927414  ...    7.677088     9.928037
     6.722163 ]
 [-254.66223    199.44435     61.523598  ...   10.261335    10.4524975
     9.955725 ]
 [-361.2908     132.92685     93.41626   ...   13.143988    12.698032
    12.344129 ]
 ...
 [-464.18796      4.6727586    4.665011  ...    4.4217567    4.3697395
     4.313256 ]
 [-465.8464       2.3291216    2.3266516 ...    2.2483687    2.2314472
     2.212997 ]
 [-467.49396      0.           0.        ...    0.           0.
     0.       ]]


### 构建模型

In [2]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Dropout, Flatten

def build_multi_task_model(input_shape, num_classes):
    # 输入层
    inputs = Input(shape=input_shape)
    
    # 共享特征提取层
    lstm1 = LSTM(128, return_sequences=True)(inputs)
    dropout1 = Dropout(0.5)(lstm1)
    lstm2 = LSTM(64)(dropout1)
    dropout2 = Dropout(0.5)(lstm2)

    # 将 LSTM 输出展平，以便与 Dense 层连接
    flattened = Flatten()(dropout2)
    
    # 时间预测分支
    cat_output = Dense(num_classes, activation='softmax', name='cat')(flattened)
    
    # 构建模型
    model = Model(inputs=inputs, outputs=cat_output)
    
    # 编译模型
    model.compile(
        loss={'cat': 'categorical_crossentropy'},
        optimizer='adam',
        metrics={'cat': 'accuracy'}
    )
    
    return model

# 假设每个任务的类别数
num_classes = 14

# 假设输入形状
input_shape = (None, 13)  # 假设我们使用13维的MFCC特征

# 构建模型
model = build_multi_task_model(input_shape, num_classes)

### 训练模型

In [3]:
from keras.utils import to_categorical

cat_dict = {
    "Jungle": 0,
    "Top": 1,
    "Chest": 2,
    "Signature": 3,
    "Thump": 4,
    "Vinyl": 5,
    "Stomp": 6,
    "Punchy": 7,
    "808s": 8,
    "Distorted": 9,
    "Psy": 10,
    "Big": 11,
    "Hardstyle": 12,
    "Stadium": 13,
}

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.05, random_state=42)

y_train = list(map(cat_dict.get, y_train))
y_val = list(map(cat_dict.get, y_val))
print(y_val)

y_train_cat = to_categorical(y_train, num_classes)
y_val_cat = to_categorical(y_val, num_classes)

print(y_train_cat)

[7, 11, 13, 10, 11, 3, 8, 11, 12, 1, 9, 12, 5, 6, 12, 1, 7, 12]
[[0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]]


In [4]:
import tensorflow as tf

# 将X_train和X_val转换为RaggedTensor
X_train_ragged = tf.ragged.constant(X_train)
X_val_ragged = tf.ragged.constant(X_val)
y_train_cat = tf.convert_to_tensor(y_train_cat)
y_val_cat = tf.convert_to_tensor(y_val_cat)

# 创建tf.data.Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((X_train_ragged, y_train_cat))
val_dataset = tf.data.Dataset.from_tensor_slices((X_val_ragged, y_val_cat))

# 批量化数据
batch_size = 32
train_dataset = train_dataset.batch(batch_size)
val_dataset = val_dataset.batch(batch_size)

In [9]:
# 训练模型
history = model.fit(
    train_dataset,
    epochs=10,
    validation_data=val_dataset
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### 通过新的数据集验证

In [32]:
# 使用模型进行预测
predictions = model.predict(val_dataset)

# 转换预测结果为类别索引
predicted_classes = np.argmax(predictions, axis=1)

# 由于验证标签已经是类别索引，直接使用
true_classes = y_val

# 计算准确率
accuracy = np.mean(predicted_classes == true_classes)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

Validation Accuracy: 58.62%


### 保存训练好的模型

In [10]:
# 假设 filepath 是你想要保存模型的路径
filepath = 'test_kick_1.h5'
# 保存模型
model.save(filepath)

# 检查模型文件是否存在
if os.path.exists(filepath):
    print(f"Model saved to {filepath}")
else:
    print(f"Model save failed. File not found at {filepath}")

Model saved to test_kick_1.h5
