<a href="https://colab.research.google.com/github/PulseProgrammer/Audio_Fingerprint/blob/main/AudioID.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import librosa
import numpy as np
import os 
from skimage.feature import peak_local_max
from matplotlib import pyplot as plt

In [None]:
!touch /content/output.txt
shift_vector = 44100 * 30 // 512      #To account for the shifts as well 

In [None]:
def fingerprintbuilder(dbr_path, fing_db):
  inverted_list = {}
  audio = [x for x in os.listdir(dbr_path) if x.endswith('.wav')]
  os.makedirs(fing_db, exist_ok=True)
  for file in audio:
    y, sr = librosa.load(os.path.join(dbr_path, file), sr = 44100)

    # Check if the audio signal is less than 30 seconds long (Have encountered errors when running with this code block)
    if len(y) < sr * 30:
        # Pad the signal with zeros to make it 30 seconds long
        num_zeros = sr * 30 - len(y)
        y = np.pad(y, (0, num_zeros))
    else:
        # Trim the signal to 30 seconds
        y = y[:sr * 30]

    spec = np.abs(librosa.stft(y=y, n_fft=2048, hop_length=512, center=False))
    peaks = peak_local_max(spec, min_distance = 10, threshold_abs = 0.05)

    inverted_list= {}
    i = 0
    while i < len(peaks):
      if peaks[i][0] not in inverted_list:
        inverted_list[peaks[i][0]] = []           # Keep frequency componet of the spectogram as the key
        inverted_list[peaks[i][0]].append(peaks[i][1]) # Keep time componet of the spectogram as the value
      i += 1

  

    file = os.path.splitext(file)[0]
    fingerprint = os.path.join(fing_db, file + ".npy")
    np.save(fingerprint, inverted_list)

In [None]:
def audioIdentification(qr_path, fing_db, output_file):
  fingerprints = [x for x in os.listdir(fing_db) if x.endswith('.npy')]
  fing_dict = []
  out = ""
  for fing in fingerprints:
    finger_print = np.load(os.path.join(fing_db, fing), allow_pickle = True).item()
    fing_dict.append(finger_print)
  
  q_audio = [x for x in os.listdir(qr_path) if x.endswith('.wav')]
  correct = 0
  for q_file in q_audio:
    y, sr = librosa.load(os.path.join(qr_path, q_file), sr = 44100)
    spec = np.abs(librosa.stft(y=y, n_fft=2048, hop_length=512, center=False))
    peaks = peak_local_max(spec, min_distance = 10, threshold_abs = 0.05)
    
    final_values = {}
    for fingerprint, fd in zip(fingerprints, fing_dict):
      i = 0
      match_list = []

      while i < len(peaks):
        values = np.zeros(shift_vector * 2 + 1)
        if peaks[i][0] in fd:
          for ts in fd[peaks[i][0]]:
            values[ts - peaks[i][1] + shift_vector] = 1
            match_list.append(values)
        i += 1
      total_values = np.array(match_list)

      matching_function = np.sum(total_values, axis=0)
      maximum = np.max(matching_function)

      filename = os.path.splitext(os.path.basename(fingerprint))[0]
      final_values[filename] = maximum

    final_values = sorted(final_values.items(), key = lambda x: x[1], reverse = True)
    q_file = os.path.splitext(os.path.basename(q_file))[0]

    out +=  f"{q_file}\t"
    for i in range(3):
      out+=f"{final_values[i][0]}\t"
    out+="\n"

    if q_file in [x[0] for x in final_values[:3]]:
      correct+=1
    with open(output_file, "w") as f:
      f.write(out)




In [None]:
fingerprintbuilder("/content/drive/MyDrive/Dataset/database_recordings","/content/DR_Fingerprint")

In [None]:
audioIdentification("/content/drive/MyDrive/Dataset/query_recordings","/content/DR_Fingerprint", "/content/output.txt")

In [None]:
with open("output.txt", "r") as f:
    num_matches = 0
    num_lines = 0
    for line in f:
        filenames = line.strip().split()
        ground_truth = filenames[0].split("-snippet")[0]  # extract ground truth filename
        for filename in filenames[1:]:  # check if any of the three filenames match the ground truth
            if ground_truth in filename:
                num_matches += 1
                break  # stop checking filenames if match is found
        num_lines += 1
    
accuracy = num_matches / num_lines
print("Accuracy:", accuracy * 100)


Accuracy: 80.75117370892019
