In [2]:
import os
import numpy as np
from scipy.io import wavfile

In [3]:
def spectral_fourier_transform(data, samples=2048):
    result = []
    for i in range(data.shape[0] // samples + 1):
        start, end = i * samples, (i+1) * samples
        if end > data.shape[-1]:
            break
        result.append(np.fft.fft(data[start:end]))
    return result

In [4]:
def filtering(freq, interval=(100, 5000)):
    legal_freq = freq[(freq >= interval[0]) & (freq <= interval[1])]
    return np.where(freq == legal_freq[0])[0][0], np.where(freq == legal_freq[-1])[0][0]

In [5]:
def noiseprint(transform, rate, interval=(100, 5000), samples=2048, points_per_slice=6):
    result = []
    freq = np.fft.fftfreq(samples, d=1/rate)
    starting_frequency, ending_frequency = filtering(freq, interval)
    slice_space = (interval[1] - interval[0]) / points_per_slice
    sorted_freq = np.sort(freq[starting_frequency:ending_frequency+1])

    for time_slice in transform:
        max_freq = []
        window_start = starting_frequency
        freq_to_fourier_transform = dict()

        for i in range(starting_frequency, ending_frequency+1):
            freq_to_fourier_transform[freq[i]] = time_slice[i]

        for _ in range(points_per_slice):
            window_end = window_start
            while window_end < sorted_freq.shape[-1] and sorted_freq[window_end] <= slice_space + sorted_freq[window_start]:
                window_end += 1
            maximum_fourier_transform = max(np.absolute([freq_to_fourier_transform[sorted_freq[i]] for i in range(window_start, window_end)]))
            for frequency in sorted_freq[window_start:window_end]:
                if maximum_fourier_transform == np.absolute(freq_to_fourier_transform[frequency]):
                    max_freq.append(frequency)
                    break
            window_start = window_end + 1
        result.append(max_freq)
    return result

In [6]:
def similarity(song_spec, clip_spec, points_per_slice=6):    
    song_flat = song_spec.flatten()
    clip_flat = clip_spec.flatten()
    
    sim_window_size = points_per_slice - 1
    score = 0
    for anchor in range(clip_flat.shape[0] - points_per_slice):
        anchor_y = anchor % points_per_slice
        sim_window = clip_flat[anchor: anchor+sim_window_size]
        for song_anchor in range(anchor_y, song_flat.shape[0] - points_per_slice - 1, points_per_slice):
            if clip_flat[anchor] == song_flat[song_anchor]:
                if np.count_nonzero((song_flat[song_anchor:song_anchor+sim_window_size] - sim_window) == 0) >= 4:
                    score += 1
    
    score /= song_flat.shape[0]
    return score


In [37]:
database = dict()

print('Creating Database ...')
for root, _, files in os.walk('data'):
    for wave in files:
        rate, data = wavfile.read(root + '/' + wave)
        fourier_transform = spectral_fourier_transform(data)
        database[wave] = noiseprint(fourier_transform, rate)
print('Created Database Successfully ...')

print('Reading Inputs ...')
for root, _, files in os.walk('clip'):
    for wave in files:
        rate, data = wavfile.read(root + '/' + wave)
        fourier_transform = spectral_fourier_transform(data)
        input_noiseprint = noiseprint(fourier_transform, rate)
        result = ('', -1000)
        for database_wave, wave_noiseprint in database.items():
            s = similarity(np.array(wave_noiseprint), np.array(input_noiseprint))
            if result[1] < s:
                result = (database_wave, s)
        print('{} is similar to {}'.format(wave, result[0]))

Creating Database ...
Created Database Successfully ...
Reading Inputs ...
2.592693002840176 2_love_is_blue.wav 2_love_is_blue.wav
0.13026077975729408 2_love_is_blue.wav 3_chanson_du_toreador.wav
0.5463465014200878 2_love_is_blue.wav 1_prelude.wav
0.7139168603150013 2_love_is_blue.wav 4_el_bimbo.wav
2_love_is_blue.wav is similar to 2_love_is_blue.wav
0.13026077975729408 3_chanson_du_toreador.wav 2_love_is_blue.wav
1.357733023495998 3_chanson_du_toreador.wav 3_chanson_du_toreador.wav
0.2872450296927446 3_chanson_du_toreador.wav 1_prelude.wav
0.15349857991221275 3_chanson_du_toreador.wav 4_el_bimbo.wav
3_chanson_du_toreador.wav is similar to 3_chanson_du_toreador.wav
0.5466046991995869 1_prelude.wav 2_love_is_blue.wav
0.28763232636199326 1_prelude.wav 3_chanson_du_toreador.wav
2.707203718048025 1_prelude.wav 1_prelude.wav
0.2643945262070746 1_prelude.wav 4_el_bimbo.wav
1_prelude.wav is similar to 1_prelude.wav
0.7139168603150013 4_el_bimbo.wav 2_love_is_blue.wav
0.15349857991221275 4_el_