In [1]:
import array
import contextlib
import matplotlib
import numpy as np
import wave
import hashlib
from matplotlib import pyplot, mlab
from scipy.ndimage import maximum_filter
from scipy.ndimage import generate_binary_structure, binary_erosion, iterate_structure

In [2]:
win_size = 4096
win_overlap = 2048

### Function for reading audio file

In [3]:
def read_audio(file):
    
    with contextlib.closing(wave.open(file)) as f:
        params = f.getparams()
        frames = f.readframes(params[3])
    return array.array("h", frames), params

### Function to generate Spectrogram

In [4]:
def spectrogram(signal):
    
    graph = matplotlib.mlab.specgram(signal, NFFT = win_size, Fs = 44100, 
                                     window = matplotlib.mlab.window_hanning, 
                                     noverlap = win_overlap)[0]

    graph = 10 * np.log10(graph)
    graph[graph == -np.inf] = 0
    return graph


### Function to filter the spectrogram 

In [5]:
def filter_s(image, plot = False):
    
    structure = generate_binary_structure(2, 1)
    neighborhood = iterate_structure(structure, 20)
    
    local_max = maximum_filter(image, footprint=neighborhood)==image
    background = (image == 0)
    eroded_background = binary_erosion(background, structure=neighborhood, border_value=1)
    peaks = np.bitwise_xor(local_max,eroded_background)

    # peaks is a 2D mask
    amplitudes = image[peaks].flatten()
    freq, t = np.where(peaks)
    
    # Finding peaks with amplitude greater than 10
    ufpeaks = zip(t, freq, amplitudes)
    fpeaks = [x for x in ufpeaks if x[2] > 10]

    # getting index for time and frequency
    time = [x[0] for x in fpeaks]
    frequency = [x[1] for x in fpeaks]

    # plotting the filtered peaks
    if plot:
        fig, ax = pyplot.subplots()
        ax.imshow(image)
        ax.scatter(time, frequency)
        ax.set_xlabel('Time')
        ax.set_ylabel('Frequency')
        ax.set_title("Spectrogram")
        pyplot.gca().invert_yaxis()
        pyplot.show()

    return list(zip(time, frequency))
    


### Function to create the fingerprint 

In [6]:
def create_fingerprints(peaks):
    
    hashes = []
    
    for i in range(len(peaks)):
        for j in range(1, 15):
            if (i + j) < len(peaks):
                
                t1 = peaks[i][0]
                t2 = peaks[i + j][0]
                f1 = peaks[i][1]
                f2 = peaks[i + j][1]
                delta = t2 - t1

                hash = hashlib.sha1(str(str(f1) + str(f2) + str(delta)).encode('utf-8'))

                hashes.append((hash.hexdigest(), t1))

    return hashes

### Reading the audio files 

In [34]:
song1, params1 = read_audio("C:\\Users\\hp\\Desktop\\Python\\Song recognition system\\Audio1.wav")
song2, params2 = read_audio("C:\\Users\\hp\\Desktop\\Python\\Song recognition system\\Audio3.wav")

### Generating fingerprint for audio 1 

In [35]:
spec1 = spectrogram(song1)
peaks1 = filter_s(spec1)
fp1 = create_fingerprints(peaks1)
fp1

[('3b928622f8479780e1593e188d5315edb7d6f786', 16),
 ('e303b7b8250f3a02cdbd79d73e95658424392d81', 16),
 ('dc8b57eaba544e461fe7c5650f5893605f8ce11f', 16),
 ('e584ae42b8ed52092696bcc75e3c4df2ab188252', 16),
 ('757578fbf23ffa4d748e0800dd7c424a46feb0cc', 16),
 ('5e6b18be1a1965de8c0a0584f8a562752e6d97ee', 16),
 ('efe629421443bb85b08ff60cf37affa2b3b448df', 16),
 ('7c8da4c128f59e06d632d9399c683eef58bf29a7', 16),
 ('a11717eeea81bf773b343cf698955d21f8fe3b7c', 16),
 ('f72eac7bf25ba6d14078fde862cd752731fb2303', 16),
 ('8ff04a560c6d9293caf967d4495bb9a551861f3e', 16),
 ('4bacef4f0792e65ce5b1e5a6c92442ba172d9871', 16),
 ('4abd266684d2612dff8a659969252abc9a25ea28', 16),
 ('0a46055d0d4442a765c05ba6aeba22d5428ca8b5', 16),
 ('b7553c6080f116f0115b1f080e03484b98292d8b', 114),
 ('899776728673ef5edc1b9050b3a4c8469bc5b740', 114),
 ('8a25788ac0f9439c596d51e0bb6d1c4dccdc707a', 114),
 ('6c361bd9fd7939910f17c16f4f2d3bac84213e83', 114),
 ('3977fd8d4fabf170200aa09e29777aecd20b7a01', 114),
 ('72c30aa4f3578a1cd4c183a

### Generating fingerprint for audio 2 

In [36]:
spec2 = spectrogram(song2)
peaks2 = filter_s(spec2)
fp2 = create_fingerprints(peaks2)
fp2

[('61e47d3be6d949a2b96a3435be947d475db17cc6', 44),
 ('c5d35e00f86a0e6e66ca471030e349e4080547c2', 44),
 ('a58074e2f2ccb40b47412f9c3ee1340a5de5f962', 44),
 ('f4168f0ad0b9d7316fa87dfc6bcc4a7c2d2c7b06', 44),
 ('935e2bbc722426e027f499ac0eaf6bbf366519d8', 44),
 ('3cd13696e67cb4cabf530481ab64f04baa9a00b3', 44),
 ('a2b441f98dcf46cdf232b4df196be73eb562cbcb', 44),
 ('3531f96e5d0b3284793c42c78ffed31bd87313a7', 44),
 ('9b28da9677bf08b25eeb29c2a85969d18554d1bd', 44),
 ('9e405ce3c70b3d947b665fb2ff42bbd31b061b88', 44),
 ('b2b29dab77b69c37a463259dfc77c220771c3c15', 44),
 ('556f9020cf223a210a82613e0becaac6cf8140c7', 44),
 ('f95b3af4442ac0b013c08ee762e107e57f5bb9ac', 44),
 ('62d8ce55e03d104a9914dd19f5cb6b3bd7e8ed94', 44),
 ('683cd03cdaff7f01c978e66970b8e11e4e99fa27', 72),
 ('a26ce7033b778d9a86b014bbc39cc7c92230295f', 72),
 ('6143052bda4f09984213401c85770eb2b68493e4', 72),
 ('814184187e8473b688ddb80b298b777a5094a4d7', 72),
 ('25bc7d00b295e720655bf79872e52eb98a60039d', 72),
 ('4bc950b076834082e35cb85a7b8e

### Function to find the first match hashcode 

In [37]:
def firstmatch(X, Y):   
    m = len(X)
    n = len(Y)
    a=-1
    b=-1
    
    for i in range(0, m-1 ):
        for j in range(0, n-1 ):
                if (X[i][0] == Y[j][0]):
                    return i,j
    return a,b

### Function to compare the occurence of time offset 

In [38]:
a,b= firstmatch(fp1,fp2)

m=len(fp1)
n=len(fp2)

def change(X, j,size):
    res=[]
    if(a==-1): 
        return res;
    m=len(X)
    temp=1
    for i in range(j, size-2):
        if (X[i][1]==X[i+1][1]):
            res.append(temp)
        else:
            res.append(temp)
            temp= temp+1
            
    if(X[size-2]==X[size-3]):
        res.append(temp)
    else:
        res.append(temp+1)
        temp= temp+1
        
    if(X[size-1]==X[size-2]):
        res.append(temp)
    else:
        res.append(temp+1)
        temp= temp+1
    return res

a,b,m,n

(-1, -1, 2807, 875)

### Function to find out the longest common substring 

In [39]:
def lcs(X, Y):
 
    m = len(X)
    n = len(Y)
    
    res=[]
  
    # storing the length of the longest common substring
    result = 0
  
    # storing the ending point of the substring in X
    end = 0
    
    length = [[0 for j in range(m)]
                 for i in range(2)]
  
    currRow = 0
    
    # For a particular value of i and j,
    # length[currRow][j] stores length
    # of longest common substring in
    # string X[0..i] and Y[0..j].
    
    for i in range(0, m):
        for j in range(0, n):
            if (i == 0 or j == 0):
                length[currRow][j] = 0
             
            elif (X[i - 1] == Y[j - 1]):
                length[currRow][j] = length[1 - currRow][j - 1] + 1
                 
                if (length[currRow][j] > result):
                    result = length[currRow][j]
                    end = i - 1
            else:
                length[currRow][j] = 0
  
        # Interchanging the current row and the previous row
        currRow = 1 - currRow
        
  
    # When no common substring, print -1.
    if (result == 0):
        return "-1"
    
    lcstring = X[end - result + 1 : end + 1]
    return lcstring

### Comparing the time offset frequency

In [40]:
fpt1 =[]
fpt1= change(fp1,a,m) 

fpt2 =[]
fpt2= change(fp2,b,n) 

In [41]:
fpt1

[]

In [42]:
fpt2

[]

### Finding out the longest common substring 

In [43]:
lcsarray = lcs(fpt1,fpt2)
lcsarray

'-1'

In [44]:
len(fp1), len(fp2),len (fpt1),len(fpt2)

(2807, 875, 0, 0)

In [45]:
matches= len(lcsarray)
matches

2

### Finding out the match percentage 

In [46]:
match_percentage = matches/len(fp2) * 100
match_percentage

0.2285714285714286