In [102]:
import wave
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
from scipy import signal
import seaborn as sns
from scipy.io import wavfile
import pylab
import matplotlib.mlab as ml
from scipy.fftpack import fft

%matplotlib tk

In [127]:
DEFAULT_FS = 44100
DEFAULT_WINDOW_SIZE = 4096
DEFAULT_OVERLAP_RATIO = 0.5
DEFAULT_FAN_VALUE = 15
DEFAULT_AMP_MIN = 10

def graph_spectrogram(sound_info, frame_rate):
    pylab.figure(num=None, figsize=(19, 12))
    pylab.subplot(111)
    pylab.specgram(sound_info, Fs=frame_rate)
    pylab.savefig('spectrogram.png')

"""
Function that converts a byte string into a numpy array
"""
def _wav2array(nchannels, sampwidth, data):
    num_samples, remainder = divmod(len(data), sampwidth * nchannels)
    if remainder > 0:
        raise ValueError('The length of data is not a multiple of '
                         'sampwidth * num_channels.')
    if sampwidth > 4:
        raise ValueError("sampwidth must not be greater than 4.")

    if sampwidth == 3:
        a = np.empty((num_samples, nchannels, 4), dtype=np.uint8)
        raw_bytes = np.fromstring(data, dtype=np.uint8)
        a[:, :, :sampwidth] = raw_bytes.reshape(-1, nchannels, sampwidth)
        a[:, :, sampwidth:] = (a[:, :, sampwidth - 1:sampwidth] >> 7) * 255
        result = a.view('<i4').reshape(a.shape[:-1])
    else:
        dt_char = 'u' if sampwidth == 1 else 'i'
        a = np.fromstring(data, dtype='<%s%d' % (dt_char, sampwidth))
        result = a.reshape(-1, nchannels)
    return result

"""
Function to convert stereo to mono
"""

def stereo2mono(audiodata, nchannels):
#     if nchannels==1:
#         return audiodata.astype(int)
    audiodata = audiodata.astype(float)
    d = audiodata.sum(axis=1) / 2
    return d.astype(int)

"""
Class containing details of the wav file that has been read.
Sample use:
    song_x = song("abc.wav")
"""
class song:
    def __init__(self, file):
        wav = wave.open(file)
        self.title = file.split("/")[-1]
        self.rate = wav.getframerate()
        self.nchannels = wav.getnchannels()
        self.sampwidth = wav.getsampwidth()
        self.nframes = wav.getnframes()
        self.data = wav.readframes(self.nframes)
        self.array = stereo2mono(_wav2array(self.nchannels, self.sampwidth, self.data), self.nchannels)
        wav.close()
    def spectrogram(self):
        self.specgram, self.frequencies, self.times = ml.specgram(self.array, Fs=self.rate, NFFT = 4096, window = ml.window_hanning, noverlap = int(4096 * 0.5))
        self.specgram = 10*np.log10(self.specgram)
        self.specgram[self.specgram==-np.inf] = 0
        self.specgram = (1/20)*(np.exp(self.specgram))
        self.specgram[self.specgram<100000000000000000] = 100000000000000000
        self.specgram[self.specgram>10000000000000000000] = 10000000000000000000
        fig, ax = plt.subplots()
        ax.imshow(self.specgram)
        ax.set_xlabel('Time')
        ax.set_ylabel('Frequency')
        ax.set_title("Spectrogram of "+self.title)
        plt.gca().invert_yaxis()
        plt.show()

In [128]:
ricky = song("./Songs_Wav/Ricky Martin - Livin La Vida Loca.wav")
total = song("./Songs_Wav/Total_Breakdown.wav")



In [129]:
#ricky.spectrogram()

In [130]:
total.spectrogram()

In [71]:
total_fft = fft(total.array)

In [72]:
total_fft = total_fft[:int(total_fft.shape[0]/2)]

In [73]:
total_fft

array([ 1.88920807e+08+0.00000000e+00j, -6.42702782e+07-1.35310323e+07j,
        2.99235363e+06+2.25322233e+07j, ...,
       -3.61464246e+04+2.64443156e+04j,  5.37419201e+03-9.95322661e+02j,
       -1.17335520e+04-2.91908034e+04j])

In [32]:
44100/44100

1.0

In [22]:
total_fft.shape[0]*2

6127872

In [29]:
np.abs(total_fft)

array([1.88920807e+08, 6.56792014e+07, 2.27300521e+07, ...,
       4.47868936e+04, 5.46558387e+03, 3.14607572e+04])

In [96]:
total.frequencies

array([0.00000000e+00, 1.07666016e+01, 2.15332031e+01, ...,
       2.20284668e+04, 2.20392334e+04, 2.20500000e+04])

In [114]:
total.specgram[total.specgram<100000000000] = 100000000000

In [87]:
max((1/20)*np.exp(total.specgram[:1][0]))

9.996097629606325e+19

In [108]:
total.specgram.shape

(2049, 2991)

In [109]:
2049*2991

6128559