# MS 2.2- Group 10

**Members**: Niklas Grüner (12217059), Konstantin Unterweger (12222169), Martin Harhammer (12221683)


The following sections describe and implement an attempt to Audio Identification. 

# Task 1

In [1]:
# imports
import os, sys
import numpy as np
from numba import jit
import librosa
#from scipy import signal
from scipy import ndimage
from matplotlib import pyplot as plt
import IPython.display as ipd
import time

sys.path.append('..')
import libfmp.b
import libfmp.c2
import libfmp.c6
from IPython.display import clear_output


%matplotlib inline

In [2]:
def define_target_zone(anchor, peaks, time_window, freq_range):
    """Define a target zone for a given anchor point."""
    anchor_freq, anchor_time = anchor
    target_peaks = [
        (freq, time) for freq, time in peaks
        if anchor_time < time <= anchor_time + time_window
        and abs(anchor_freq - freq) <= freq_range
    ]
    return target_peaks


In [3]:
def create_hash(anchor, target):
    """Generate a 32-bit hash."""
    anchor_freq, anchor_time = anchor
    target_freq, target_time = target
    time_diff = target_time - anchor_time
    return (anchor_freq << 20) | (target_freq << 10) | time_diff

def generate_hashes(peaks, time_window, freq_range):
    """Generate hashes for all anchor points."""
    hashes = []
    for anchor in peaks:
        target_zone = define_target_zone(anchor, peaks, time_window, freq_range)
        for target in target_zone:
            hash_value = create_hash(anchor, target)
            hashes.append(hash_value)
    return hashes


In [4]:
def create_database(peaks_per_track, time_window, freq_range):
    """Create a database of hashes for all tracks."""
    database = {}
    for track_id, peaks in peaks_per_track.items():
        hashes = generate_hashes(peaks, time_window, freq_range)
        database[track_id] = hashes
    return database


In [5]:
def match_query(query_peaks, database, time_window, freq_range):
    """Match query peaks against the database."""
    query_hashes = generate_hashes(query_peaks, time_window, freq_range)
    matches = {}
    for track_id, track_hashes in database.items():
        common_hashes = set(query_hashes) & set(track_hashes)
        matches[track_id] = len(common_hashes)
    return sorted(matches.items(), key=lambda x: x[1], reverse=True)


In [6]:
def compute_constellation_map(Y, dist_freq=7, dist_time=7, thresh=0.01):
    """Compute constellation map (implementation using image processing)

    Notebook: C7/C7S1_AudioIdentification.ipynb

    Args:
        Y (np.ndarray): Spectrogram (magnitude)
        dist_freq (int): Neighborhood parameter for frequency direction (kappa) (Default value = 7)
        dist_time (int): Neighborhood parameter for time direction (tau) (Default value = 7)
        thresh (float): Threshold parameter for minimal peak magnitude (Default value = 0.01)

    Returns:
        Cmap (np.ndarray): Boolean mask for peak structure (same size as Y)
    """
    result = ndimage.maximum_filter(Y, size=[2*dist_freq+1, 2*dist_time+1], mode='constant')
    Cmap = np.logical_and(Y == result, result > thresh)
    return Cmap

In [7]:
def plot_constellation_map(Cmap, Y=None, xlim=None, ylim=None, title='',
                           xlabel='Time (sample)', ylabel='Frequency (bins)',
                           s=5, color='r', marker='o', figsize=(7, 3), dpi=72):
    """Plot constellation map

    Notebook: C7/C7S1_AudioIdentification.ipynb

    Args:
        Cmap: Constellation map given as boolean mask for peak structure
        Y: Spectrogram representation (Default value = None)
        xlim: Limits for x-axis (Default value = None)
        ylim: Limits for y-axis (Default value = None)
        title: Title for plot (Default value = '')
        xlabel: Label for x-axis (Default value = 'Time (sample)')
        ylabel: Label for y-axis (Default value = 'Frequency (bins)')
        s: Size of dots in scatter plot (Default value = 5)
        color: Color used for scatter plot (Default value = 'r')
        marker: Marker for peaks (Default value = 'o')
        figsize: Width, height in inches (Default value = (7, 3))
        dpi: Dots per inch (Default value = 72)

    Returns:
        fig: The created matplotlib figure
        ax: The used axes.
        im: The image plot
    """
    if Cmap.ndim > 1:
        (K, N) = Cmap.shape
    else:
        K = Cmap.shape[0]
        N = 1
    if Y is None:
        Y = np.zeros((K, N))
    fig, ax = plt.subplots(1, 1, figsize=figsize, dpi=dpi)
    im = ax.imshow(Y, origin='lower', aspect='auto', cmap='gray_r', interpolation='nearest')
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    Fs = 1
    if xlim is None:
        xlim = [-0.5/Fs, (N-0.5)/Fs]
    if ylim is None:
        ylim = [-0.5/Fs, (K-0.5)/Fs]
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    n, k = np.argwhere(Cmap == 1).T
    ax.scatter(k, n, color=color, s=s, marker=marker)
    plt.tight_layout()
    return fig, ax, im

In [8]:
def load_filenames(directory):
    filenames = []
    
    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        # Create the full path
        file_path = os.path.join(directory, filename)
        
        # Check if it is a file (not a directory)
        if os.path.isfile(file_path):
            # Add to the dictionary, using the filename as the key
            filenames.append(file_path)

    return filenames

In [9]:
from concurrent.futures import ThreadPoolExecutor

def compute_constellation_map_single(args):
    """Compute the constellation map for a single file."""
    filename, dist_freq, dist_time = args
    spectrogram = compute_spectrogram(filename)  # Perform I/O and computation
    constellation_map = compute_constellation_map(spectrogram, dist_freq, dist_time)
    return filename, constellation_map

def compute_constellation_maps(filenames, dist_freq, dist_time):
    """Compute constellation maps using multithreading."""
    # Prepare arguments for each file
    args = [(filename, dist_freq, dist_time) for filename in filenames]
    
    # Use ThreadPoolExecutor for multithreading
    with ThreadPoolExecutor() as executor:
        results = executor.map(compute_constellation_map_single, args)
    
    # Convert results to a dictionary
    Cmaps = dict(results)
    return Cmaps

In [10]:
def compute_matching_function(C_D, C_Q, tol_freq=0, tol_time=0):
    """Computes matching function for constellation maps."""
    L = C_D.shape[1]
    N = C_Q.shape[1]
    M = L - N
    assert M >= 0, "Query must be shorter than document"

    # Precompute the max filter for the estimation (outside of Numba)
    C_est_max = ndimage.maximum_filter(C_Q, size=(2 * tol_freq + 1, 2 * tol_time + 1), mode='constant')

    # Use the Numba-accelerated version
    Delta, shift_max = compute_matching_function_numba(C_D, C_Q, C_est_max, tol_freq, tol_time)
    return Delta, shift_max

In [11]:
def compute_spectrogram(fn_wav, Fs=22050, N=2048, H=1024, bin_max=128, frame_max=None, duration=None):
    x, Fs = librosa.load(fn_wav, sr=Fs, duration=30)
    x_duration = len(x) / Fs
    X = librosa.stft(x, n_fft=N, hop_length=H, win_length=N, window='hann')
    if bin_max is None:
        bin_max = X.shape[0]
    if frame_max is None:
        frame_max = X.shape[0]
    Y = np.abs(X[:bin_max, :frame_max])
    return Y

In [12]:
configs = [
    {"kappa": 6, "tau": 17, "tol_freq": 1, "tol_time": 2, "cmaps_D": None, "cmaps_Q": None, "matches": {}, "hits": {"total": 0, "original": 0, "noise": 0, "coding": 0, "mobile": 0}}
]

In [13]:
tracks = load_filenames('tracks') # load all track filenames
for config in configs: 
    config["cmaps_D"] = compute_constellation_maps(tracks, config["kappa"], config["tau"]) # store the computed constellation maps for each configuration.

In [18]:
peaks = {}

# Iterate over all tracks in the dictionary
for track, boolean_spectrogram in configs[0]["cmaps_D"].items():
    print(f"Processing track: {track}")
    
    # Extract peaks (indices of True values) from the spectrogram
    track_peaks = np.argwhere(boolean_spectrogram)
    
    # Store the peaks for the current track in the dictionary
    peaks[track] = track_peaks.tolist()  # Convert to a list for JSON compatibility if needed

# Print the resulting dictionary of peaks
print("Peaks dictionary:")
for track, track_peaks in peaks.items():
    print(f"{track}: {len(track_peaks)} peaks")

Processing track: tracks/1049710.mp3
Processing track: tracks/1277910.mp3
Processing track: tracks/1419910.mp3
Processing track: tracks/22610.mp3
Processing track: tracks/387610.mp3
Processing track: tracks/625210.mp3
Processing track: tracks/1151410.mp3
Processing track: tracks/1216210.mp3
Processing track: tracks/1083510.mp3
Processing track: tracks/307110.mp3
Processing track: tracks/121310.mp3
Processing track: tracks/1398910.mp3
Processing track: tracks/1271910.mp3
Processing track: tracks/1299810.mp3
Processing track: tracks/316310.mp3
Processing track: tracks/1283010.mp3
Processing track: tracks/661610.mp3
Processing track: tracks/1337210.mp3
Processing track: tracks/1042010.mp3
Processing track: tracks/1074810.mp3
Processing track: tracks/80410.mp3
Processing track: tracks/1116710.mp3
Processing track: tracks/119410.mp3
Processing track: tracks/1329110.mp3
Processing track: tracks/1379110.mp3
Processing track: tracks/81810.mp3
Processing track: tracks/1323010.mp3
Processing tra

In [19]:
queries = load_filenames('queries')
for config in configs: 
    config["cmaps_Q"] = compute_constellation_maps(queries, config["kappa"], config["tau"])

In [21]:
peaksQ = {}

# Iterate over all tracks in the dictionary
for query, boolean_spectrogram_Q in configs[0]["cmaps_Q"].items():
    print(f"Processing track: {query}")
    
    # Extract peaks (indices of True values) from the spectrogram
    query_peaks = np.argwhere(boolean_spectrogram_Q)
    
    # Store the peaks for the current track in the dictionary
    peaksQ[query] = query_peaks.tolist()  # Convert to a list for JSON compatibility if needed

# Print the resulting dictionary of peaks
print("Peaks dictionary:")
for query, query_peaks in peaksQ.items():
    print(f"{query}: {len(query_peaks)} peaks")

Processing track: queries/1192210_noise.mp3
Processing track: queries/91810_coding.mp3
Processing track: queries/1084710_original.mp3
Processing track: queries/242610_mobile.mp3
Processing track: queries/53310_original.mp3
Processing track: queries/1192210_mobile.mp3
Processing track: queries/1147910_mobile.mp3
Processing track: queries/242610_noise.mp3
Processing track: queries/152310_mobile.mp3
Processing track: queries/1227910_original.mp3
Processing track: queries/963810_noise.mp3
Processing track: queries/887210_noise.mp3
Processing track: queries/91810_noise.mp3
Processing track: queries/1084710_coding.mp3
Processing track: queries/1269810_mobile.mp3
Processing track: queries/1390710_mobile.mp3
Processing track: queries/1400510_noise.mp3
Processing track: queries/91810_original.mp3
Processing track: queries/1084710_noise.mp3
Processing track: queries/1249910_original.mp3
Processing track: queries/1192210_coding.mp3
Processing track: queries/262010_coding.mp3
Processing track: que

In [46]:
time_window = 30  # Adjust as needed
freq_range = 80  # Adjust as needed

# Step 1: Create Database
database = create_database(peaks, time_window, freq_range)

# Step 2: Match Query
total_hits = 0  # Initialize counter for total hits

for query_id, query_peaks in peaksQ.items():
    # Match the current query against the database
    matches = match_query(query_peaks, database, time_window, freq_range)
    
    if matches:  # Ensure there are matches
        # Get the top result (highest number of matching hashes)
        top_match = matches[0]  # Matches are sorted by match count in descending order
        track_id, match_count = top_match

        # Check if the cleaned query ID matches the track ID
        if query_id.replace("queries/", "").replace("_original", "").replace("_mobile", "").replace("_noise", "").replace("_coding", "") == track_id.replace("tracks/", ""):
            print(f" {query_id} {track_id}: {match_count} matching hashes")
            total_hits += 1  # Add the match count to the total

# Print the total hits at the end
print(f"Total hits: {total_hits}")


 queries/1192210_noise.mp3 tracks/1192210.mp3: 133 matching hashes
 queries/91810_coding.mp3 tracks/91810.mp3: 101 matching hashes
 queries/1084710_original.mp3 tracks/1084710.mp3: 215 matching hashes
 queries/242610_mobile.mp3 tracks/242610.mp3: 15 matching hashes
 queries/53310_original.mp3 tracks/53310.mp3: 10 matching hashes
 queries/1192210_mobile.mp3 tracks/1192210.mp3: 59 matching hashes
 queries/242610_noise.mp3 tracks/242610.mp3: 125 matching hashes
 queries/152310_mobile.mp3 tracks/152310.mp3: 24 matching hashes
 queries/1227910_original.mp3 tracks/1227910.mp3: 124 matching hashes
 queries/963810_noise.mp3 tracks/963810.mp3: 159 matching hashes
 queries/887210_noise.mp3 tracks/887210.mp3: 100 matching hashes
 queries/91810_noise.mp3 tracks/91810.mp3: 85 matching hashes
 queries/1084710_coding.mp3 tracks/1084710.mp3: 116 matching hashes
 queries/1269810_mobile.mp3 tracks/1269810.mp3: 22 matching hashes
 queries/1390710_mobile.mp3 tracks/1390710.mp3: 122 matching hashes
 querie