In [1]:
import wave
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
from scipy import signal
import seaborn as sns
from scipy.io import wavfile
import pylab
import matplotlib.mlab as ml
from scipy.fftpack import fft

%matplotlib tk

In [23]:
DEFAULT_FS = 44100
DEFAULT_WINDOW_SIZE = 4096
DEFAULT_OVERLAP_RATIO = 0.5
DEFAULT_FAN_VALUE = 15
DEFAULT_AMP_MIN = 10

def graph_spectrogram(sound_info, frame_rate):
    pylab.figure(num=None, figsize=(19, 12))
    pylab.subplot(111)
    pylab.specgram(sound_info, Fs=frame_rate)
    pylab.savefig('spectrogram.png')

"""
Function that converts a byte string into a numpy array
"""
def _wav2array(nchannels, sampwidth, data):
    num_samples, remainder = divmod(len(data), sampwidth * nchannels)
    if remainder > 0:
        raise ValueError('The length of data is not a multiple of '
                         'sampwidth * num_channels.')
    if sampwidth > 4:
        raise ValueError("sampwidth must not be greater than 4.")

    if sampwidth == 3:
        a = np.empty((num_samples, nchannels, 4), dtype=np.uint8)
        raw_bytes = np.fromstring(data, dtype=np.uint8)
        a[:, :, :sampwidth] = raw_bytes.reshape(-1, nchannels, sampwidth)
        a[:, :, sampwidth:] = (a[:, :, sampwidth - 1:sampwidth] >> 7) * 255
        result = a.view('<i4').reshape(a.shape[:-1])
    else:
        dt_char = 'u' if sampwidth == 1 else 'i'
        a = np.fromstring(data, dtype='<%s%d' % (dt_char, sampwidth))
        result = a.reshape(-1, nchannels)
    return result

"""
Function to convert stereo to mono
"""

def stereo2mono(audiodata, nchannels):
#     if nchannels==1:
#         return audiodata.astype(int)
    audiodata = audiodata.astype(float)
    d = audiodata.sum(axis=1) / 2
    return d.astype(int)

"""
Class containing details of the wav file that has been read.
Sample use:
    song_x = song("abc.wav")
"""
class song:
    def __init__(self, file):
        wav = wave.open(file)
        self.title = file.split("/")[-1]
        self.rate = wav.getframerate()
        self.nchannels = wav.getnchannels()
        self.sampwidth = wav.getsampwidth()
        self.nframes = wav.getnframes()
        self.data = wav.readframes(self.nframes)
        self.array = stereo2mono(_wav2array(self.nchannels, self.sampwidth, self.data), self.nchannels)
        wav.close()
    def spectrogram(self):
        self.specgram, self.frequencies, self.times = ml.specgram(self.array, Fs=self.rate, NFFT = 4096, window = ml.window_hanning, noverlap = int(4096 * 0.5), mode='magnitude')
        self.specgram = 10*np.log10(self.specgram)
        self.specgram[self.specgram==-np.inf] = 0
#         self.specgram = (1/20)*(np.exp(self.specgram))
#         self.specgram[self.specgram<100000000000000000] = 100000000000000000
#         self.specgram[self.specgram>10000000000000000000] = 10000000000000000000
#         fig, ax = plt.subplots()
#         ax.imshow(self.specgram, aspect='auto')
#         ax.set_xlabel('Time')
#         ax.set_ylabel('Frequency')
#         ax.set_title("Spectrogram of "+self.title)
#         plt.gca().invert_yaxis()
#         plt.show()
    def find_key(self):
        self.spectrogram()
        all_times = self.specgram.transpose()
        #self.all_times = all_times
        bands = []
        count = 0
        for a in all_times:
            l = []
            x = max(a[0:10])
            l.append((x, [self.frequencies[list(a[0:10]).index(x)],self.times[count]]))
            x = max(a[10:20])
            l.append((x, [self.frequencies[list(a[10:20]).index(x)+10],self.times[count]]))
            x = max(a[20:40])
            l.append((x, [self.frequencies[list(a[20:40]).index(x)+20],self.times[count]]))
            x = max(a[40:80])
            l.append((x, [self.frequencies[list(a[40:80]).index(x)+40],self.times[count]]))
            x = max(a[80:160])
            l.append((x, [self.frequencies[list(a[80:160]).index(x)+80],self.times[count]]))
            x = max(a[160:510])
            l.append((x, [self.frequencies[list(a[160:510]).index(x)+160],self.times[count]]))
            bands.append(l)
            count+=1
        l = []
        #print('length',len(bands))
        for a in bands:
            for b in a:
                l.append(b[0])
        #l has all the amplitudes in bands
        mean = 1.2*np.mean(l)
        new_bands = []
        count = 0
        for i in range(0, len(bands)):
            a = bands[i]
            m = [t[1] for t in a if t[0]>mean]
            if len(m)!=0:
                new_bands.append((count, m))
                count+=1
        self.bands = new_bands

In [24]:
total = song("Ricky Martin - Livin La Vida Loca.wav")
#total = song("./Songs_Wav/Total_Breakdown.wav")



In [25]:
total.find_key()

In [26]:
# from cmath import *
# def fft(x, N):
#     if N==1: 
#         return x
#     even = fft([x[k] for k in range(0,N,2)], int(N/2))
#     odd = fft([x[k] for k in range(1,N,2)], int(N/2))
 
#     M = int(N/2)
#     l = [ even[k] + exp(-2j*pi*k/N)*odd[k] for k in range(M) ]
#     r = [ even[k] - exp(-2j*pi*k/N)*odd[k] for k in range(M) ]
 
#     return l+r

In [27]:
total.bands

[(0, [[775.1953125, 0.37151927437641724]]),
 (1,
  [[764.4287109375, 0.4179591836734694], [1593.45703125, 0.4179591836734694]]),
 (2, [[775.1953125, 0.46439909297052157]]),
 (3, [[785.9619140625, 0.5572789115646258]]),
 (4, [[775.1953125, 0.603718820861678]]),
 (5, [[775.1953125, 0.6501587301587302]]),
 (6, [[796.728515625, 0.6965986394557823]]),
 (7, [[785.9619140625, 0.7430385487528345]]),
 (8, [[829.0283203125, 0.7894784580498866]]),
 (9, [[796.728515625, 0.8359183673469388]]),
 (10, [[796.728515625, 0.8823582766439909]]),
 (11, [[839.794921875, 0.9287981859410431]]),
 (12, [[807.4951171875, 0.9752380952380952]]),
 (13, [[839.794921875, 1.0216780045351475]]),
 (14, [[796.728515625, 1.1145578231292517]]),
 (15, [[775.1953125, 1.1609977324263039]]),
 (16, [[796.728515625, 1.207437641723356]]),
 (17, [[807.4951171875, 1.253877551020408]]),
 (18, [[785.9619140625, 1.3003174603174603]]),
 (19, [[796.728515625, 1.3467573696145125]]),
 (20, [[796.728515625, 1.3931972789115645]]),
 (21, [[7

In [43]:
total.new_bands = []
for ele in total.bands:
    ele = ele[1]
    for sub in ele:
        total.new_bands.append(sub)

In [46]:
sond_id = 0
addresses = {}
for i in range(3,len(total.new_bands)-4):
    anchor_point = total.new_bands[i-3]
    for ele in total.new_bands[i:i+5]:
        addresses[anchor_point[0],ele[0],ele[1] - anchor_point[1]] = [anchor_point[1],sond_id]

In [45]:
anchor_point

[775.1953125, 0.37151927437641724]

In [48]:
len(addresses)

3272