# Task 2 – Sound Similarity Detection

In [3]:
import os
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
base_dir = r"D:\project echo\Project-Echo-main\src\Prototypes\data\mfcc_features"
mfcc_data = []
file_paths = []

for species in os.listdir(base_dir):
    species_path = os.path.join(base_dir, species)
    if os.path.isdir(species_path):
        for filename in os.listdir(species_path):
            if filename.endswith(".npy"):
                full_path = os.path.join(species_path, filename)
                mfcc = np.load(full_path)
                if len(mfcc.shape) == 2:
                    mfcc_flat = mfcc.flatten()[:3000]  # 限制维度
                    mfcc_data.append(mfcc_flat)
                    file_paths.append(f"{species}/{filename}")

# 确保所有特征维度一致
min_len = min(len(x) for x in mfcc_data)
mfcc_data = [x[:min_len] for x in mfcc_data]
mfcc_matrix = np.stack(mfcc_data)
print(f"Loaded {len(mfcc_matrix)} MFCC vectors of length {min_len}")

Loaded 7514 MFCC vectors of length 403


In [14]:
cos_sim = cosine_similarity(mfcc_matrix)
euc_dist = euclidean_distances(mfcc_matrix)

cos_df = pd.DataFrame(cos_sim, index=file_paths, columns=file_paths)
euc_df = pd.DataFrame(euc_dist, index=file_paths, columns=file_paths)

In [11]:
plt.figure(figsize=(10, 8))
sns.heatmap(cos_df.iloc[:10, :10], cmap="viridis", xticklabels=True, yticklabels=True)
plt.title("Cosine Similarity (Top 10 samples)")
plt.tight_layout()
plt.show()

Found 12652829 similar sound pairs above threshold 0.85
Acanthiza chrysorrhoa/region_11.250-13.250.npy <--> Acanthiza chrysorrhoa/region_11.750-13.750.npy | Cosine: 0.93
Acanthiza chrysorrhoa/region_11.250-13.250.npy <--> Acanthiza chrysorrhoa/region_12.800-14.800.npy | Cosine: 0.99
Acanthiza chrysorrhoa/region_11.250-13.250.npy <--> Acanthiza chrysorrhoa/region_13.250-15.250.npy | Cosine: 0.99
Acanthiza chrysorrhoa/region_11.250-13.250.npy <--> Acanthiza chrysorrhoa/region_16.750-18.750.npy | Cosine: 1.00
Acanthiza chrysorrhoa/region_11.250-13.250.npy <--> Acanthiza chrysorrhoa/region_17.600-19.600.npy | Cosine: 0.94


In [13]:
threshold = 0.85
similar_pairs = []

for i in range(len(file_paths)):
    for j in range(i + 1, len(file_paths)):
        if cos_sim[i][j] > threshold:
            similar_pairs.append((file_paths[i], file_paths[j], cos_sim[i][j]))

print(f"Found {len(similar_pairs)} similar sound pairs above threshold {threshold}")
for p in similar_pairs[:5]:
    print(f"{p[0]} <--> {p[1]} | Cosine: {p[2]:.2f}")

Found 12652829 similar sound pairs above threshold 0.85
Acanthiza chrysorrhoa/region_11.250-13.250.npy <--> Acanthiza chrysorrhoa/region_11.750-13.750.npy | Cosine: 0.93
Acanthiza chrysorrhoa/region_11.250-13.250.npy <--> Acanthiza chrysorrhoa/region_12.800-14.800.npy | Cosine: 0.99
Acanthiza chrysorrhoa/region_11.250-13.250.npy <--> Acanthiza chrysorrhoa/region_13.250-15.250.npy | Cosine: 0.99
Acanthiza chrysorrhoa/region_11.250-13.250.npy <--> Acanthiza chrysorrhoa/region_16.750-18.750.npy | Cosine: 1.00
Acanthiza chrysorrhoa/region_11.250-13.250.npy <--> Acanthiza chrysorrhoa/region_17.600-19.600.npy | Cosine: 0.94
