In [30]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import json

embeddings = "/content/drive/MyDrive/NEU/embeddings.json"

# Open the JSON file and load its content into a dictionary
with open(embeddings, 'r') as file:
    embeddings_dict = json.load(file)

In [163]:
import numpy as np

In [32]:
sound_labels = {
"18-music3.wav": "music",
"13-fire2.wav": "fire",
"15-helicopter1.wav": "vehicle",
"16-motoboat.wav": "vehicle",
"1-aircraft1.wav": "vehicle",
"20-noise1.wav": "noise",
"3-aircraft3.wav": "vehicle",
"19-music4.wav": "music",
"21-noise2.wav": "noise",
"30-wind2.wav": "wind",
"17-music1.wav": "music",
"12-fire1.wav": "fire",
"14-guitar.wav": "musical instrument",
"4-bell1.wav": "musical instrument",
"2-aircraft2.wav": "vehicle",
"11-echo1.wav": "accoustic environment",
"24-violin1.wav": "musical instrument",
"27-whitenoise1.wav": "noise",
"25-water1.wav": "water",
"29-wind1.wav": "wind",
"28-wild animal.wav": "wild animal",
"26-whistle.wav": "whistling",
"23-skateboard2.wav": "skateboard",
"22-skateboard.wav": "skateboard",
"10-dog.wav": "domestic animal",
"9-clap2.wav": "hands",
"8-clap.wav": "hands",
"7-cat2.wav": "domestic animal",
"6-cat.wav": "domestic animal",
"5-bird.wav": "wild animal",
}

In [164]:
from scipy.spatial import distance

def euclidean_dist(vec1, vec2):
    return distance.euclidean(vec1, vec2)

def cosine_dist(vec1, vec2):
    return distance.cosine(vec1, vec2)


In [None]:
top_k = 2

In [113]:
# calculate the euclidean distance between the target audio and the entire collection
def euclidean_distance_to_all(target_id, audio_collection):
    distances = []
    target_data = audio_collection[target_id]

    for compare_id, compare_data in audio_collection.items():
        if target_id != compare_id:
            dist = euclidean_dist(target_data, compare_data)
            distances.append((compare_id, dist))

    distances.sort(key=lambda x: x[1])
    return distances

In [89]:
def euclidean_similar(embeddings_dict, top_k):
  top_k_results = {}

  for target_id, target_data in embeddings_dict.items():
      distances = []
      for compare_name, compare_data in embeddings_dict.items():
          if target_id != compare_name:
              dist = euclidean_dist(target_data, compare_data)
              distances.append((compare_name, dist))


      distances.sort(key=lambda x: x[1])
      top_k_ids = [id for id, dist in distances[:top_k]]
      top_k_results[target_id] = top_k_ids


  for target_id, similar_ids in top_k_results.items():
      print(f"Euclieand - Top {top_k} similar sounds for Audio {target_id}: {similar_ids}")

# euclidean_similar(embeddings_dict, top_k)

In [58]:
def cosine_similar(embeddings_dict, top_k):
  top_k_results = {}

  for target_id, target_data in embeddings_dict.items():
      distances = []
      for compare_name, compare_data in embeddings_dict.items():
          if target_id != compare_name:
              dist = cosine_dist(target_data, compare_data)
              distances.append((compare_name, dist))


      distances.sort(key=lambda x: x[1])
      top_k_ids = [id for id, dist in distances[:top_k]]
      top_k_results[target_id] = top_k_ids


  for target_id, similar_ids in top_k_results.items():
      print(f"Cosine - Top {top_k} similar sounds for Audio {target_id}: {similar_ids}")

# cosine_similar(embeddings_dict, top_k)

In [61]:
audio_names = list(sound_labels.keys())
audio_labels = [sound_labels[name] for name in audio_names]

def is_similar(label1, label2):
    return label1 == label2

In [179]:
def evaluate_similarity(distances, target_label, sound_labels, N):
    precision_at_N = []
    true_positives = 0
    # total_retrieved = 0
    similar_sounds = []

    # Sort the distances array by the distance values
    sorted_distances = sorted(distances, key=lambda x: x[1])
    sorted_audio_names = [name for name, dist in sorted_distances]

    for i, audio_name in enumerate(sorted_audio_names, 1):  # Start enumeration at 1
        if sound_labels[audio_name] == target_label:
            if true_positives < N:
                true_positives += 1
                similar_sounds.append((audio_name, sound_labels[audio_name]))

    precision_at_N = true_positives / N if N > 0 else 0
    # print(true_positives, N, precision_at_N)

    # For overall precision and recall, consider all retrieved audios
    total_retrieved = len(sorted_audio_names) - 1  # Exclude the target audio from total retrieved count
    total_similar = sum(1 for label in sound_labels.values() if label == target_label) - 1  # Exclude the target audio
    # precision = true_positives / total_retrieved if total_retrieved > 0 else 0
    recall = true_positives / total_similar if total_similar > 0 else 0

    return recall, precision_at_N, similar_sounds


In [180]:
#sample test
from scipy.spatial import distance

distance = euclidean_distance_to_all("19-music4.wav", embeddings_dict)
recall, precision_at_N, similar_sounds = evaluate_similarity(distance, "music", sound_labels, 1)
print(recall, precision_at_N, similar_sounds)

0.5 1.0 [('17-music1.wav', 'music')]


In [183]:
from scipy.spatial import distance

def calculate_overall_metrics(embeddings_dict, sound_labels, N=5):
    all_precision = []
    all_recall = []
    all_precision_at_N = []

    for target_id in embeddings_dict.keys():
        distances = euclidean_distance_to_all(target_id, embeddings_dict)
        target_label = sound_labels[target_id]
        # Evaluate similarity metrics for this target
        avg_recall, precision_at_N, similar_sounds = evaluate_similarity(distances, target_label, sound_labels, N)

        # print(target_id, target_label, precision_at_N, similar_sounds)
        # Collect the metrics for averaging
        all_precision.append(avg_precision)
        all_recall.append(avg_recall)
        all_precision_at_N.append(precision_at_N)

    # Compute the average metrics across all targets
    mean_precision = np.mean(all_precision)
    mean_recall = np.mean(all_recall)
    mean_precision_at_N = np.mean(all_precision_at_N)

    return mean_recall, mean_precision_at_N

n = 2
mean_recall, mean_precision_at_N = calculate_overall_metrics(embeddings_dict, sound_labels, n)

print(f"Mean Recall: {mean_recall}")
print(f"Mean Precision @ {n}: {mean_precision_at_N}")

Mean Recall: 0.8166666666666667
Mean Precision @ 2: 0.7333333333333333
