<a href="https://colab.research.google.com/github/Hazzd12/CASA0018_coursework/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Audio preprocessing
First, we need to convert the original audio file to the Mayer spectrum, a common representation of audio features that is particularly suitable for feeding convolutional neural networks (CNNS) for training.

In [285]:
import numpy as np
import librosa
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from skimage.transform import resize

# def load_and_trim_audio(audio_path, target_length=1.5):
#     y, sr = librosa.load(audio_path)
#     if len(y) > sr * target_length:
#         y = y[:int(sr * target_length)]  # 截取前target_length秒的音频数据
#     return y, sr
def load_and_segment_audio(audio_path, target_length=1.5):
    y, sr = librosa.load(audio_path)
    buffer_length = int(sr * target_length)
    segments = [y[i:i + buffer_length] for i in range(0, len(y), buffer_length) if i + buffer_length <= len(y)]
    return segments, sr

def process_audio_segments(audio_path):
    segments, sr = load_and_segment_audio(audio_path)
    features_list = []
    for segment in segments:
        # 假设extract_melspectrogram是提取特征的函数
        melspectrogram = extract_melspectrogram(segment, sr)
        features_list.append(melspectrogram)
    return features_list

def add__noise(data, noise_level=0.005, color='pink'):
    noise = np.random.randn(len(data))
    augmented_data = data + noise_level * noise
    return augmented_data

def extract_melspectrogram(y, sr, n_fft=2048, hop_length=512, n_mels=128):
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    S_DB = librosa.power_to_db(S, ref=np.max)
    S_resized = resize_melspectrogram(S_DB, target_shape=(128, 128))
                # 确保增加一个维度来表示单通道
    S_resized = S_resized[..., np.newaxis]
    return S_resized


In [286]:
import zipfile
import os
import shutil

def unzip_audio_files(zip_path, extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print(f"Extracted audio files to {extract_path}")

def delete_directory(directory_path):
    try:
        shutil.rmtree(directory_path)
        print(f"Directory '{directory_path}' deleted successfully.")
    except OSError as e:
        print(f"Error: {directory_path} : {e.strerror}")

# 设置ZIP文件路径和解压目录
zip_path = '/content/dataset/Data.zip'
extract_path = '/content/dataset/data'

# 解压ZIP文件
delete_directory(extract_path)
unzip_audio_files(zip_path, extract_path)


Directory '/content/dataset/data' deleted successfully.
Extracted audio files to /content/dataset/data


In [287]:


def process_and_visualize(audio_path, target_length=1.5, noise_level=0.005):
    # 加载并截取音频
    y, sr = load_and_trim_audio(audio_path, target_length=target_length)

    # 添加噪声
    y_noisy = add__noise(y, noise_level=noise_level)

    # 提取梅尔频谱
    melspectrogram = extract_melspectrogram(y_noisy, sr)

    # 可视化梅尔频谱
    # plt.figure(figsize=(10, 4))
    # librosa.display.specshow(melspectrogram, sr=sr, hop_length=512, x_axis='time', y_axis='mel')
    # plt.colorbar(format='%+2.0f dB')
    # plt.title('Mel Spectrogram with Noise')
    # plt.tight_layout()
    #plt.show()

    return melspectrogram

# 示例：处理并可视化音频文件
#melspectrogram = process_and_visualize(str(audio_file))


In [288]:
from pathlib import Path

def resize_melspectrogram(mels, target_shape=(128, 128)):
    # 使用 skimage 的 resize 函数调整 mels 的大小
    return resize(mels, target_shape, mode='constant', anti_aliasing=True)

def load_data_and_labels(audio_dir):
    categories = [f.name for f in os.scandir(audio_dir) if f.is_dir()]
    labels_dict = {category: i for i, category in enumerate(categories)}
    print(labels_dict)
    X, y = [], []
    for category, label in labels_dict.items():
        category_dir = Path(audio_dir) / category
        for audio_file in category_dir.glob('*.ogg'):  # 直接处理ogg文件
            try:
                segments, sr = load_and_segment_audio(str(audio_file))
                for segment in segments:
                    spectrogram = extract_melspectrogram(segment, sr)
                    X.append(spectrogram)
                    y.append(label)
            except Exception as e:
                print(f"Error processing {audio_file}: {e}")
    return np.array(X), np.array(y), categories

# 加载数据和标签
X, y, categories = load_data_and_labels(extract_path+'/Data')
# 可选：保存数据和标签为NumPy数组文件
np.save('X.npy', X)
np.save('y.npy', y)
print(categories)

{'102 - Rooster': 0, '105 - Frog': 1, '101 - Dog': 2, '104 - Cow': 3, '103 - Pig': 4}
['102 - Rooster', '105 - Frog', '101 - Dog', '104 - Cow', '103 - Pig']


In [289]:
from tensorflow.keras.regularizers import l2
# 模型构建函数
def build_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        #Dense(64, activation='relu'),
        Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    return model

In [268]:
# 相似度计算函数
def calculate_similarity(feature1, feature2):
    return cosine_similarity(feature1.reshape(1, -1), feature2.reshape(1, -1))[0][0]

In [290]:
# 数据集分割（这里使用假设的X和y）
input_shape = (128, 128, 1)
num_classes = 5  # 假设的类别数量
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 构建并编译模型
model = build_model(input_shape, num_classes)
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [291]:

# 训练模型
history = model.fit(X_train, y_train, epochs=15, validation_split=0.2)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


1232

In [292]:
def extract_features(model, audio_path):
    # 使用之前定义的 process_and_visualize 函数来处理音频文件
    melspectrogram = process_and_visualize(audio_path)
    # 将melspectrogram调整到模型的输入大小
    melspectrogram = melspectrogram.reshape(1, *melspectrogram.shape, 1)
    # 使用模型提取特征
    features = model.predict(melspectrogram)
    return features


In [293]:
def find_most_similar_animal(model, audio_path, categories, category_features):
    # 提取音频的特征
    new_audio_features = extract_features(model, audio_path)

    # 计算与每个类别的特征的相似度
    similarities = {category: calculate_similarity(new_audio_features, cat_feat) for category, cat_feat in category_features.items()}

    # 找出相似度最高的类别
    most_similar_category = max(similarities, key=similarities.get)
    similarity_percent = similarities[most_similar_category] * 100  # 转换为百分比形式

    print(f"Most similar animal: {most_similar_category} with similarity: {similarity_percent:.2f}%")
    return most_similar_category, similarity_percent


In [300]:
def similarity_to_specified_animal(model, audio_path, specified_category, category_features):
    # 提取音频的特征
    new_audio_features = extract_features(model, audio_path)

    # 检查指定的类别是否存在于特征字典中
    if specified_category not in category_features:
        raise ValueError(f"Specified category '{specified_category}' not found in the provided categories.")

    # 计算与指定类别的特征的相似度
    cat_feat = category_features[specified_category]
    similarity = calculate_similarity(new_audio_features, cat_feat)

    # 转换为百分比形式
    similarity_percent = similarity * 100

    print(f"Similarity to {specified_category}: {similarity_percent:.2f}%")
    return similarity_percent


In [297]:
# 假设model是已经训练好的模型
# 假设X_train, y_train是你的训练数据和标签
category_features = {}
categories = ['Rooster', 'Frog', 'Cow', 'Pig', 'Dog']  # 示例类别
label_indices = {cat: np.where(y_train == i)[0] for i, cat in enumerate(categories)}

for cat, indices in label_indices.items():
    # 假设extract_features从数据中提取特征
    cat_features = np.mean([model.predict(X_train[idx].reshape(1, *X_train[idx].shape, 1)) for idx in indices], axis=0)
    category_features[cat] = cat_features

{'Rooster': array([[0.79012895, 0.03848081, 0.06755476, 0.0392361 , 0.06459934]],
      dtype=float32), 'Frog': array([[0.0212969 , 0.93195033, 0.02255466, 0.00859012, 0.01560798]],
      dtype=float32), 'Cow': array([[0.04855144, 0.03687205, 0.8245051 , 0.03777032, 0.05230124]],
      dtype=float32), 'Pig': array([[0.03334854, 0.01609053, 0.01833466, 0.903013  , 0.02921328]],
      dtype=float32), 'Dog': array([[0.03824312, 0.02489342, 0.06020427, 0.05802218, 0.818637  ]],
      dtype=float32)}


In [304]:
# 调用函数
audio_path = '/content/dataset/testData/FROG_5.s2 (1).wav'  # 新音频文件路径
most_similar_animal, similarity_percent = find_most_similar_animal(model, audio_path, categories, category_features)

# 输出结果
print(f"The most similar animal is {most_similar_animal} with a similarity of {similarity_percent:.2f}%.")

find_most_similar_animals(model, audio_path, categories, category_features)
similarity_to_specified_animal(model, audio_path, 'Dog', category_features)

Most similar animal: Cow with similarity: 99.74%
The most similar animal is Cow with a similarity of 99.74%.
Similarity with each animal:
Rooster: 9.60%
Frog: 5.69%
Cow: 99.72%
Pig: 3.05%
Dog: 9.90%

Most similar animal: Cow with similarity: 99.72%
Similarity to Dog: 10.34%


10.342983156442642

In [298]:
from pathlib import Path

def evaluate_audio_folder(model, folder_path, categories, category_features):
    # 使用Path对象获取文件夹路径
    folder = Path(folder_path)
    results = []

    # 遍历文件夹中的所有音频文件
    for audio_file in folder.glob('*.wav'):  # 假设音频文件为ogg格式
        try:
            # 使用find_most_similar_animal函数找出最相似的动物
            most_similar_animal, similarity_percent = find_most_similar_animal(model, str(audio_file), categories, category_features)
            find_most_similar_animals(model, str(audio_file), categories, category_features)
            # 保存结果
            results.append((audio_file.name, most_similar_animal, similarity_percent))
            print(f"File: {audio_file.name} - Most similar animal: {most_similar_animal} with similarity: {similarity_percent:.2f}%")
            print("\n")
        except Exception as e:
            print(f"Error processing {audio_file.name}: {e}")

    return results

# 调用evaluate_audio_folder函数
# 以下变量需要先定义好：model, categories, category_features
folder_path = '/content/dataset/testData'  # 修改为音频文件所在的目录路径
audio_results = evaluate_audio_folder(model, folder_path, categories, category_features)

Most similar animal: Pig with similarity: 99.84%
Similarity with each animal:
Rooster: 4.92%
Frog: 0.92%
Cow: 4.55%
Pig: 99.84%
Dog: 7.04%

Most similar animal: Pig with similarity: 99.84%
File: Cow.s3.wav - Most similar animal: Pig with similarity: 99.84%


Most similar animal: Pig with similarity: 99.84%
Similarity with each animal:
Rooster: 4.92%
Frog: 0.92%
Cow: 4.55%
Pig: 99.84%
Dog: 7.04%

Most similar animal: Pig with similarity: 99.84%
File: Cow.s2.wav - Most similar animal: Pig with similarity: 99.84%


Most similar animal: Cow with similarity: 99.00%
Similarity with each animal:
Rooster: 22.37%
Frog: 4.05%
Cow: 99.37%
Pig: 13.53%
Dog: 10.75%

Most similar animal: Cow with similarity: 99.37%
File: Frog.s3.wav - Most similar animal: Cow with similarity: 99.00%


Most similar animal: Pig with similarity: 99.86%
Similarity with each animal:
Rooster: 5.06%
Frog: 0.95%
Cow: 5.19%
Pig: 99.86%
Dog: 7.11%

Most similar animal: Pig with similarity: 99.86%
File: Frog.s1.wav - Most simil