<a href="https://colab.research.google.com/github/Hazzd12/CASA0018_coursework/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Audio preprocessing
First, we need to convert the original audio file to the Mayer spectrum, a common representation of audio features that is particularly suitable for feeding convolutional neural networks (CNNS) for training.

In [None]:
import librosa
import numpy as np

def audio_to_melspectrogram(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    mels = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    mels_db = librosa.power_to_db(mels, ref=np.max)
    return mels_db


In [None]:
# 假设我们有以下数据结构
audio_paths = ['path/to/audio1.wav', 'path/to/audio2.wav', ...]
scores = [3.5, 4.0, ...]  # 假设评分在0到5之间

# 将音频文件转换为梅尔频谱特征
X_train = np.array([audio_to_melspectrogram(path) for path in audio_paths])
y_train = np.array(scores)


In [None]:
from tensorflow.keras import layers, models

def create_model(input_shape):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(16, 3, activation='relu', padding='same'),
        layers.MaxPooling2D(2),
        layers.Conv2D(32, 3, activation='relu', padding='same'),
        layers.MaxPooling2D(2),
        layers.Conv2D(64, 3, activation='relu', padding='same'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(1, activation='linear')  # 输出层为线性激活函数以预测评分
    ])

    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model

# 由于梅尔频谱的大小可能不同，我们需要统一输入尺寸
# 这里我们选取或调整到一个固定的大小，比如 (128, 128)
X_train_resized = np.array([librosa.util.fix_length(x, size=128*128).reshape(128, 128) for x in X_train])
input_shape = X_train_resized.shape[1:] + (1,)

# 调整X_train的形状以适配模型的输入
X_train_resized = X_train_resized[..., np.newaxis]

model = create_model(input_shape)


In [None]:
history = model.fit(X_train_resized, y_train, epochs=20, batch_size=32, validation_split=0.2)
