# MS 2.2- Group 10

**Members**: Niklas Grüner (12217059), Konstantin Unterweger (12222169), Martin Harhammer (12221683)


The following sections describe and implement an attempt to Audio Identification. 

## Imports

In [3]:
# imports
import os, sys
import numpy as np
from numba import jit
import librosa
#from scipy import signal
from scipy import ndimage
from matplotlib import pyplot as plt
import IPython.display as ipd
import time

sys.path.append('..')
import libfmp.b
import libfmp.c2
import libfmp.c6
from IPython.display import clear_output
from collections import defaultdict
import math
import matplotlib.pyplot as plt
import pickle 

import gc


%matplotlib inline

## Utility functions

Here we define all functions that, we later need to compute spectrograms, constellation maps, the hashes and perform the matching

In [4]:
def load_filenames(directory):
    filenames = []
    
    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        # Create the full path
        file_path = os.path.join(directory, filename)
        
        # Check if it is a file (not a directory)
        if os.path.isfile(file_path):
            # Add to the dictionary, using the filename as the key
            filenames.append(file_path)

    return filenames

In [5]:
def compute_spectrogram(fn_wav, Fs=22050, N=2048, H=1024, bin_max=128, frame_max=None, duration=None):
    x, Fs = librosa.load(fn_wav, sr=Fs)
    x_duration = len(x) / Fs
    X = librosa.stft(x, n_fft=N, hop_length=H, win_length=N, window='hann')
    if bin_max is None:
        bin_max = X.shape[0]
    if frame_max is None:
        frame_max = X.shape[0]
    Y = np.abs(X[:bin_max, :frame_max])
    return Y

In [6]:
def compute_constellation_map(Y, dist_freq=7, dist_time=7, thresh=0.01):
    """Compute constellation map (implementation using image processing)

    Notebook: C7/C7S1_AudioIdentification.ipynb

    Args:
        Y (np.ndarray): Spectrogram (magnitude)
        dist_freq (int): Neighborhood parameter for frequency direction (kappa) (Default value = 7)
        dist_time (int): Neighborhood parameter for time direction (tau) (Default value = 7)
        thresh (float): Threshold parameter for minimal peak magnitude (Default value = 0.01)

    Returns:
        Cmap (np.ndarray): Boolean mask for peak structure (same size as Y)
    """
    result = ndimage.maximum_filter(Y, size=[2*dist_freq+1, 2*dist_time+1], mode='constant')
    Cmap = np.logical_and(Y == result, result > thresh)
    return Cmap

In [7]:
from concurrent.futures import ThreadPoolExecutor

def compute_constellation_map_single(args):
    """Compute the constellation map for a single file."""
    filename, dist_freq, dist_time = args
    spectrogram = compute_spectrogram(filename)  # Perform I/O and computation
    constellation_map = compute_constellation_map(spectrogram, dist_freq, dist_time)
    return filename, constellation_map

def compute_constellation_maps(filenames, dist_freq, dist_time):
    """Compute constellation maps using multithreading."""
    # Prepare arguments for each file
    args = [(filename, dist_freq, dist_time) for filename in filenames]
    
    # Use ThreadPoolExecutor for multithreading
    with ThreadPoolExecutor() as executor:
        results = executor.map(compute_constellation_map_single, args)
    
    # Convert results to a dictionary
    Cmaps = dict(results)
    return Cmaps

In [8]:
count = 0
count2 = 0

In [23]:

def compute_hashes_from_spectrogram(filename, dist_freq, dist_time, 
                                    time_min_offset, time_max_offset, 
                                    freq_min_offset, freq_max_offset):
    """
    Compute hashes for a single track directly from its spectrogram.
    
    Args:
        filename (str): Path to the audio file.
        dist_freq (int): Neighborhood parameter for frequency direction.
        dist_time (int): Neighborhood parameter for time direction.
        time_min_offset, time_max_offset, freq_min_offset, freq_max_offset (int): 
            Bounds for the "target zone" around an anchor point.

    Returns:
        list: [(hash, track_name, time_offset), ...]
    """

    global count
    global count2
    spectrogram = compute_spectrogram(filename)
    cmap = compute_constellation_map(spectrogram, dist_freq, dist_time)
    
    # Convert the boolean map to a list of points (freq, time), sorted by time
    point_list = sorted(np.argwhere(cmap).tolist(), key=lambda x: x[1])
    hashes = []
    del cmap
    del spectrogram
    
    for anchor in point_list:
        target_points = get_target_zone_points(
            anchor, 
            point_list,
            time_min_offset=time_min_offset,
            time_max_offset=time_max_offset,
            freq_min_offset=freq_min_offset,
            freq_max_offset=freq_max_offset
        )
        for target_point in target_points:
            h = compute_hash(anchor, target_point)
            hashes.append((h, int(extract_numeric_id(filename)), anchor[1]))
    
    count += 1
    count2 += 1
    if (count % 1000) == 0:
        gc.collect()
        print(f"Processed {count} tracks")

    return hashes




def build_database(
    directory,
    dist_freq=11,
    dist_time=3,
    time_min_offset=5,
    time_max_offset=30,
    freq_min_offset=-15,
    freq_max_offset=15
):
    global count
    count = 0
    """
    Build a fingerprint database directly from spectrograms using multithreading.
    
    Args:
        directory (str): Directory containing audio files.
        dist_freq, dist_time (int): Constellation map neighborhood parameters.
        time_min_offset, time_max_offset, freq_min_offset, freq_max_offset (int): 
            Bounds for the "target zone" around an anchor point.

    Returns:
        database (defaultdict): {hash: [(track_name, time_offset), ...]}
    """
    tracks = load_filenames(directory)

    path_name = "partial_database_" + str(dist_freq) + "#" + str(dist_time) + "#" + str(time_min_offset) + "#" + str(time_max_offset) + "#" + str(freq_min_offset) + "#" + str(freq_max_offset) + ".pkl"

    # Prepare arguments for each file
    args = [
        (filename, dist_freq, dist_time, time_min_offset, time_max_offset, freq_min_offset, freq_max_offset) 
        for filename in tracks
    ]
    
    database = defaultdict(list)
    global count2

    # Use multithreading to compute hashes for all tracks
    with ThreadPoolExecutor() as executor:
        for result in executor.map(lambda x: compute_hashes_from_spectrogram(*x), args):
            for h, track_name, time_offset in result:
                database[h].append((track_name, time_offset))

            if(count2 > 1000):
                count2 = 0
                save_partial_database(database, path_name)
                database.clear()
    
    save_partial_database(database, path_name)
    database.clear()
    return database





In [10]:
def compute_hash(anchor, target):
    """Generate a 32-bit hash."""
    f1, t1 = anchor
    f2, t2 = target
    dt = t2 - t1
    return (f1 & 0x3FF) | ((f2 & 0x3FF) << 10) | ((dt & 0xFFF) << 20)



In [11]:
def get_target_zone_points(anchor, point_list,
                           time_min_offset, time_max_offset,
                           freq_min_offset, freq_max_offset):
    """
    Finds points in 'point_list' that lie within time [t1 + time_min_offset, t1 + time_max_offset]
    and frequency [f1 + freq_min_offset, f1 + freq_max_offset].
    
    anchor: (f1, t1) or (t1, f1) – whichever convention you are using.
    point_list: list of (f, t) or (t, f) – must be sorted by the time coordinate if we want to break early.
    time_min_offset, time_max_offset: how far in time we look relative to anchor's time t1.
    freq_min_offset, freq_max_offset: how far in frequency we look relative to anchor's freq f1.
    """
    f1, t1 = anchor
    
    # Time bounds
    t_min = t1 + time_min_offset
    t_max = t1 + time_max_offset
    
    # Frequency bounds
    f_min = f1 + freq_min_offset
    f_max = f1 + freq_max_offset
    
    target_zone_points = []
    for (f2, t2) in point_list:
        # If the list is sorted by time and t2 > t_max, we can break early.
        if t2 > t_max:
            break
        
        if t_min < t2 <= t_max and f_min <= f2 <= f_max:
            target_zone_points.append((f2, t2))
    
    return target_zone_points


In [12]:
import re
import random

def extract_numeric_id(filename):
    """
    Example: 
      - 'queries/1269810_original.mp3' => '1269810'
      - 'tracks/1269810.mp3'           => '1269810'
    Adjust to your naming conventions as needed.
    """
    match = re.search(r'(\d+)', filename)
    return match.group(1) if match else None

In [13]:
def find_best_match_for_query(
    query_name,
    cmap,
    database,
    time_min_offset=5,
    time_max_offset=30,
    freq_min_offset=-15,
    freq_max_offset=15
):
    """
    Given a query's spectrogram cmap (2D Boolean array),
    find the best matching track in the database of fingerprints.

    Returns:
        best_track: str or None
        best_delta: int or None
        best_count: int
        point_list: list of non-zero (freq,time) points in the query
    """
    # Convert the boolean array to a list of points (freq, time)
    point_list = sorted(np.argwhere(cmap).tolist(), key=lambda x: x[1])

    # Dictionary of track_name -> (offset -> count)
    matches = defaultdict(lambda: defaultdict(int))
    
    # For each point in the query, find matching points in the database
    for anchor in point_list:
        target_points = get_target_zone_points(
            anchor,
            point_list,
            time_min_offset=time_min_offset,
            time_max_offset=time_max_offset,
            freq_min_offset=freq_min_offset,
            freq_max_offset=freq_max_offset
        )

        for target_point in target_points:
            h = compute_hash(anchor, target_point)

            # If our hash is found in the database
            if h in database:
                # database[h] = list of (track_name, track_offset)
                for track_name, track_offset in database[h]:
                    delta_offset = track_offset - anchor[1]
                    matches[track_name][delta_offset] += 1
    
    # Find the best match with the highest count
    best_track = None
    best_delta = None
    best_count = 0
    
    for track_name, offset_counts in matches.items():
        for delta_offset, count in offset_counts.items():
            if count > best_count:
                best_count = count
                best_track = track_name
                best_delta = delta_offset

    return best_track, best_delta, best_count, point_list

In [14]:
def load_all_chunks_from_pickle(filename):
    """Load all chunks from a single pickle file into memory."""
    chunks = []
    with open(filename, "rb") as f:
        while True:
            try:
                chunks.append(pickle.load(f))  # Load each chunk and append to the list
            except EOFError:
                break  # Stop when end of file is reached
    return chunks

In [30]:
def match_queries_with_chunks(
    directory,
    dist_freq=11,
    dist_time=3,
    time_min_offset=5,
    time_max_offset=30,
    freq_min_offset=-15,
    freq_max_offset=15
):
    """
    Match queries against a database loaded incrementally in chunks.

    Args:
        directory (str): Path to the query files.
        database_file (str): Path to the serialized database file.
        time_min_offset, time_max_offset, freq_min_offset, freq_max_offset (int): 
            Bounds for matching.

    Returns:
        results (list): List of query match results.
    """
    database_file = "partial_database_" + str(dist_freq) + "#" + str(dist_time) + "#" + str(time_min_offset) + "#" + str(time_max_offset) + "#" + str(freq_min_offset) + "#" + str(freq_max_offset) + ".pkl"
    queries = load_filenames(directory)  # Load all query filenames
    results = []
    num_correct = 0
    total_queries = len(queries)
    
    cmaps = {}
    
    for query_name in queries:
        _, cmap = compute_constellation_map_single((query_name, 11, 3))
        cmaps[query_name]=cmap

    bestmatches = {}
    for database_chunk in load_partial_database(database_file):
        for query_name in queries:
            start = time.time()
            track, delta, count, points = find_best_match_for_query(
                query_name,
                cmaps[query_name],
                database_chunk,
                time_min_offset=time_min_offset,
                time_max_offset=time_max_offset,
                freq_min_offset=freq_min_offset,
                freq_max_offset=freq_max_offset
            )
            if query_name in bestmatches: 
                if bestmatches[query_name][2] < count:
                    end = time.time()
                    d = (end - start) * 1000
                    d = d + bestmatches[query_name][4]
                    bestmatches[query_name] = track, delta, count, points, d
            else: 
                end = time.time()
                d = (end - start) * 1000
                bestmatches[query_name] = track, delta, count, points, d



    # Print final results
    count3 = 0
    total_time = 0.0  # To accumulate the total time
    time_count = 0  # To count the number of time values

    for query_name in queries:
        track_id, _, _, _, d = bestmatches[query_name]  # Unpack values, d is the time
        is_match = int(extract_numeric_id(query_name)) == track_id  # Check if IDs match

        # Extract numeric time from string (e.g., "1.23s" -> 1.23)
        total_time += d  # Add to total time
        time_count += 1  # Increment the count for time

        if is_match:
            count3 += 1
        # Print query, track ID, match status, and time
        print(f"Query: {query_name}, Track ID: {track_id}, Match: {is_match}, Time: {d}")

    # Calculate the average time
    average_time = total_time / time_count if time_count > 0 else 0

    # Print the final count and average time
    print(f"Count3: {count3}")
    print(f"Average Time: {average_time:.2f}s")

    return bestmatches


In [16]:
def load_partial_database(filename):
    """Load chunks from a pickle file incrementally."""
    with open(filename, "rb") as f:
        while True:
            try:
                yield pickle.load(f)  # Yield one chunk at a time
            except EOFError:
                break  # Stop when end of file is reached

In [17]:

def match_queries(
    directory,
    database,
    time_min_offset=5,
    time_max_offset=30,
    freq_min_offset=-15,
    freq_max_offset=15
):
    """
    - Matches each query in cmaps_Q to the best track in the database.
    :param cmaps_Q: dict {query_name: 2D numpy array (spectrogram boolean map)}
    :param database: dict {hash: [(track_name, track_offset), ...]}
    """

    queries = load_filenames(directory) # load all track filenames
        
    results = []
    num_correct = 0
    
    # Collect queries in a list to iterate consistently
    total_queries = len(queries)

    

    for query_name in queries:
        starttime = time.time()
        
        _, cmap = compute_constellation_map_single((query_name, 11, 3))       

        best_track = None
        best_delta = None
        best_count = 0
        point_list = []
        
        for database_chunk in load_partial_database(database_file):
            track, delta, count, points = find_best_match_for_query(
                query_name,
                cmap,
                database_chunk,
                time_min_offset=time_min_offset,
                time_max_offset=time_max_offset,
                freq_min_offset=freq_min_offset,
                freq_max_offset=freq_max_offset
            )

            # Update the best match if this chunk has a better match
            if count > best_count:
                best_track, best_delta, best_count, point_list = track, delta, count, points


        endtime = time.time()
        

        query_id = extract_numeric_id(query_name)
        track_id = extract_numeric_id(best_track) if best_track else None
        correct = (query_id == track_id)
        if correct:
            num_correct += 1

        # Store the result
        results.append((query_name, best_track, best_delta, best_count, correct, (endtime-starttime)*1000))

    # ----------------------------------------------------------------------
    # Print final results
    # ----------------------------------------------------------------------
    accuracy = num_correct / total_queries if total_queries else 0
    print("\nMATCHING RESULTS:")
    sum_duration = 0
    for qname, tname, offset, count, is_correct, duration in results:
        print(f"Query: {qname} => Best match: {tname}, Time: {duration} ms, Offset: {offset}, Count: {count}, Correct: {is_correct}")
        sum_duration += duration

    print(f"\nCorrect matches: {num_correct}/{total_queries}")
    print(f"Average query time: {sum_duration/len(results)} ms")
    print(f"Accuracy: {accuracy*100:.2f}%")

    return results

In [18]:
def save_partial_database(database, filename):
    """Append partial data to a pickle file."""
    with open(filename, "ab") as f:  # Append mode
        pickle.dump(database, f)


# Task 1

## Define configurations

In [22]:
config_1 = {
    "time_min_offset": 0,
    "time_max_offset": 50,
    "freq_min_offset": -10,
    "freq_max_offset": 30
}

config_2 = {
    "time_min_offset": 0,
    "time_max_offset": 30,
    "freq_min_offset": 0,
    "freq_max_offset": 25
}

config_3 = {
    "time_min_offset": 10,
    "time_max_offset": 40,
    "freq_min_offset": -20,
    "freq_max_offset": 20
}

config_4 = {
    "time_min_offset": 10,
    "time_max_offset": 60,
    "freq_min_offset": -20,
    "freq_max_offset": 20
}



## Build the database

In [20]:
s = time.time()
database_1 = build_database("tracks", **config_1)
e = time.time()
print(e-s)

Processed 1000 tracks
Processed 2000 tracks
Processed 3000 tracks
Processed 4000 tracks
Processed 5000 tracks
Processed 6000 tracks
Processed 7000 tracks
Processed 8000 tracks
Processed 9000 tracks
Processed 10000 tracks
Processed 11000 tracks
Processed 12000 tracks
Processed 13000 tracks
Processed 14000 tracks
Processed 15000 tracks
Processed 16000 tracks
Processed 17000 tracks
Processed 18000 tracks
Processed 19000 tracks
Processed 20000 tracks
Processed 21000 tracks
Processed 22000 tracks
Processed 23000 tracks
Processed 24000 tracks
Processed 25000 tracks
Processed 26000 tracks
Processed 27000 tracks
Processed 28000 tracks
Processed 29000 tracks
Processed 30000 tracks
Processed 31000 tracks
Processed 32000 tracks
Processed 33000 tracks
2438.1496403217316


In [21]:
database_2 = build_database("tracks", **config_2)

Processed 34000 tracks
Processed 35000 tracks
Processed 36000 tracks
Processed 37000 tracks
Processed 38000 tracks
Processed 39000 tracks
Processed 40000 tracks
Processed 41000 tracks
Processed 42000 tracks
Processed 43000 tracks
Processed 44000 tracks
Processed 45000 tracks
Processed 46001 tracks
Processed 47000 tracks
Processed 48000 tracks
Processed 49000 tracks
Processed 50000 tracks
Processed 51000 tracks
Processed 52000 tracks
Processed 53000 tracks
Processed 54000 tracks
Processed 55000 tracks
Processed 56000 tracks
Processed 57000 tracks
Processed 58000 tracks
Processed 59000 tracks
Processed 60000 tracks
Processed 61000 tracks
Processed 62000 tracks
Processed 63000 tracks
Processed 64000 tracks
Processed 65000 tracks
Processed 66000 tracks
Processed 67000 tracks


In [24]:
s = time.time()

database_3 = build_database("tracks", **config_3)
e = time.time()
print(e-s)

Processed 1000 tracks
Processed 2000 tracks
Processed 3000 tracks
Processed 4000 tracks
Processed 5000 tracks
Processed 6000 tracks
Processed 7000 tracks
Processed 8000 tracks
Processed 9000 tracks
Processed 10000 tracks
Processed 11000 tracks
Processed 12000 tracks
Processed 13000 tracks
Processed 14000 tracks
Processed 15000 tracks
Processed 16000 tracks
Processed 17000 tracks
Processed 18000 tracks
Processed 19000 tracks
Processed 20000 tracks
Processed 21000 tracks
Processed 22000 tracks
Processed 23000 tracks
Processed 24000 tracks
Processed 25000 tracks
Processed 26001 tracks
Processed 27000 tracks
Processed 28000 tracks
Processed 29000 tracks
Processed 30000 tracks
Processed 31000 tracks
Processed 32000 tracks
Processed 33000 tracks
2361.0693411827087


In [25]:
database_4 = build_database("tracks", **config_4)

Processed 1000 tracks
Processed 2001 tracks
Processed 3000 tracks
Processed 4000 tracks
Processed 5000 tracks
Processed 6000 tracks
Processed 7000 tracks
Processed 8000 tracks
Processed 9000 tracks
Processed 10000 tracks
Processed 11000 tracks
Processed 12000 tracks
Processed 13000 tracks
Processed 14000 tracks
Processed 15000 tracks
Processed 16000 tracks
Processed 17000 tracks
Processed 18000 tracks
Processed 19000 tracks
Processed 20000 tracks
Processed 21000 tracks
Processed 22000 tracks
Processed 23000 tracks
Processed 24000 tracks
Processed 25000 tracks
Processed 26000 tracks
Processed 27000 tracks
Processed 28000 tracks
Processed 29000 tracks
Processed 30001 tracks
Processed 31000 tracks
Processed 32000 tracks
Processed 33000 tracks


# Task 2: Audio Identification

### Load the Databases into memory (skip if not needed)

In [32]:
s = time.time()
matches_1 = match_queries_with_chunks("queries", **config_1)
e = time.time()
print(e-s)

Query: queries/1192210_noise.mp3, Track ID: 1192210, Match: True, Time: 4899.5232582092285
Query: queries/91810_coding.mp3, Track ID: 91810, Match: True, Time: 60.437679290771484
Query: queries/1084710_original.mp3, Track ID: 1084710, Match: True, Time: 155.40218353271484
Query: queries/242610_mobile.mp3, Track ID: 900733, Match: False, Time: 25.046586990356445
Query: queries/53310_original.mp3, Track ID: 53310, Match: True, Time: 264.6794319152832
Query: queries/1192210_mobile.mp3, Track ID: 1192210, Match: True, Time: 42.215585708618164
Query: queries/1147910_mobile.mp3, Track ID: 1362117, Match: False, Time: 31.372547149658203
Query: queries/242610_noise.mp3, Track ID: 242610, Match: True, Time: 197.79610633850098
Query: queries/152310_mobile.mp3, Track ID: 152310, Match: True, Time: 133.58187675476074
Query: queries/1227910_original.mp3, Track ID: 1227910, Match: True, Time: 219.75135803222656
Query: queries/963810_noise.mp3, Track ID: 963810, Match: True, Time: 480.56936264038086


In [33]:
matches_2 = match_queries_with_chunks("queries", **config_2)

Query: queries/1192210_noise.mp3, Track ID: 1192210, Match: True, Time: 52.24132537841797
Query: queries/91810_coding.mp3, Track ID: 91810, Match: True, Time: 26.519775390625
Query: queries/1084710_original.mp3, Track ID: 1084710, Match: True, Time: 110.30721664428711
Query: queries/242610_mobile.mp3, Track ID: 1262645, Match: False, Time: 23.451566696166992
Query: queries/53310_original.mp3, Track ID: 53310, Match: True, Time: 38.89894485473633
Query: queries/1192210_mobile.mp3, Track ID: 1192210, Match: True, Time: 506.13999366760254
Query: queries/1147910_mobile.mp3, Track ID: 173343, Match: False, Time: 511.0750198364258
Query: queries/242610_noise.mp3, Track ID: 242610, Match: True, Time: 167.97804832458496
Query: queries/152310_mobile.mp3, Track ID: 152310, Match: True, Time: 42.971134185791016
Query: queries/1227910_original.mp3, Track ID: 1227910, Match: True, Time: 715.6801223754883
Query: queries/963810_noise.mp3, Track ID: 963810, Match: True, Time: 219.14100646972656
Query:

In [34]:
matches_3 = match_queries_with_chunks("queries", **config_3)

Query: queries/1192210_noise.mp3, Track ID: 1192210, Match: True, Time: 47.7452278137207
Query: queries/91810_coding.mp3, Track ID: 91810, Match: True, Time: 19.625186920166016
Query: queries/1084710_original.mp3, Track ID: 1084710, Match: True, Time: 246.81901931762695
Query: queries/242610_mobile.mp3, Track ID: 1352901, Match: False, Time: 13.442277908325195
Query: queries/53310_original.mp3, Track ID: 53310, Match: True, Time: 18.624305725097656
Query: queries/1192210_mobile.mp3, Track ID: 1192210, Match: True, Time: 709.6705436706543
Query: queries/1147910_mobile.mp3, Track ID: 1066916, Match: False, Time: 28.91063690185547
Query: queries/242610_noise.mp3, Track ID: 242610, Match: True, Time: 101.84335708618164
Query: queries/152310_mobile.mp3, Track ID: 152310, Match: True, Time: 82.58891105651855
Query: queries/1227910_original.mp3, Track ID: 1227910, Match: True, Time: 145.33519744873047
Query: queries/963810_noise.mp3, Track ID: 963810, Match: True, Time: 131.2096118927002
Quer

In [None]:
matches_4 = match_queries_with_chunks("queries", **config_4)

# Task 3: Scale up

# Task 4: Report