In [8]:
import numpy
import scipy
import librosa
from scipy.fftpack import dct
import os


dirname = 'D:/Documents/Final Year Project/Wav files/Survey 7/'

In [3]:
mylist = os.listdir(dirname)    
mylist.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
mfcc_list = []

In [4]:
mylist

['1. Yaarukkum Sollaama.wav',
 '2.Unna Paartha Naeram.wav',
 '3.Melala Vedikudhu.wav',
 '4.Gangster.wav',
 '5.Madurai_Ponnu.wav',
 '6.Yedho_Mayakkam.wav',
 '7.Yealae Yealae Dosthu Da.wav',
 '8. Ennai Saaithaalae.wav',
 '9. Vaan Engum Nee Minna.wav',
 '10.Boomi Enna Suthudhe.wav',
 '11.Local Boys.wav',
 '12.Velicha Poove Va.wav',
 '13.Mokkamanusha.wav',
 '14.Oru_Porambokku.wav',
 '15.Konjum_Kili.wav',
 '16.Aaga naga.wav',
 '17.Amali Thumali.wav',
 '18.Gala gala.wav',
 '19.Ayayayo Aanthamey.wav',
 '20.Sollitaley Ava Kaadhala.wav',
 '21.Naani Koni.wav',
 '22.Theeyae Theeyae.wav',
 '23.Kaal Mulaitha Poovae.wav',
 '24.Jal Jal Jal Oosai.wav',
 '25.Yenna Solla.wav',
 '26.Vilayadu Mankatha.wav',
 '27. Vaada Bin Laada.wav',
 '28.Machi Open The Bottle.wav',
 '29.Balle Lakka.wav',
 '30.Kaadhal Yen Kaadhal.wav',
 '31.Putham Puthu.wav',
 '32. Kadhal Ennulle.wav',
 '33.Pistah.wav',
 '34.Chillena.wav',
 '35.Imaye Imaye.wav',
 '36.Oday Oday.wav',
 '37..Azhagho Azhaghu.wav',
 '38.Adi Raangi.wav',
 '39.

In [5]:
sample_rate = 22050
signal = []
pre_emphasis = 0.97
frame_size = 0.025
frame_stride = 0.01

NFFT = 512
nfilt = 40
num_ceps = 13

## Constructing Filterbank

In [6]:
low_freq_mel = 0
high_freq_mel = (2595 * numpy.log10(1 + (sample_rate / 2) / 700))  # Convert Hz to Mel
mel_points = numpy.linspace(low_freq_mel, high_freq_mel, nfilt + 2)  # Equally spaced in Mel scale
hz_points = (700 * (10**(mel_points / 2595) - 1))  # Convert Mel to Hz
bin = numpy.floor((NFFT + 1) * hz_points / sample_rate)

fbank = numpy.zeros((nfilt, int(numpy.floor(NFFT / 2 + 1))))
for m in range(1, nfilt + 1):
    f_m_minus = int(bin[m - 1])   # left
    f_m = int(bin[m])             # center
    f_m_plus = int(bin[m + 1])    # right

    for k in range(f_m_minus, f_m):
        fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
    for k in range(f_m, f_m_plus):
        fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])

## Calc MFCCs for all songs in list

In [9]:
%%time
for i in mylist:
    
    
    signal, sample_rate = librosa.load(dirname+i, offset = 20, duration = 30)
    emphasized_signal = numpy.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])
    frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate  # Convert from seconds to samples
    signal_length = len(emphasized_signal)
    frame_length = int(round(frame_length))
    frame_step = int(round(frame_step))
    num_frames = int(numpy.ceil(float(numpy.abs(signal_length - frame_length)) / frame_step))  # Make sure that we have at least 1 frame

    pad_signal_length = num_frames * frame_step + frame_length
    z = numpy.zeros((pad_signal_length - signal_length))
    pad_signal = numpy.append(emphasized_signal, z) # Pad Signal to make sure that all frames have equal number of samples without truncating any samples from the original signal

    indices = numpy.tile(numpy.arange(0, frame_length), (num_frames, 1)) + numpy.tile(numpy.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(numpy.int32, copy=False)]
    frames *= numpy.hamming(frame_length)
    
    
    mag_frames = numpy.absolute(numpy.fft.rfft(frames, NFFT))  # Magnitude of the FFT
    pow_frames = ((1.0 / NFFT) * ((mag_frames) ** 2))  # Power Spectrum
    filter_banks = numpy.dot(pow_frames, fbank.T)
    filter_banks = numpy.where(filter_banks == 0, numpy.finfo(float).eps, filter_banks)  # Numerical Stability
    filter_banks = 20 * numpy.log10(filter_banks)  # dB
    mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1 : (num_ceps + 1)] # Keep 2-13
    cep_lifter = 22
    (nframes, ncoeff) = mfcc.shape
    n = numpy.arange(ncoeff)
    lift = 1 + (cep_lifter / 2) * numpy.sin(numpy.pi * n / cep_lifter)
    mfcc *= lift    
    avg_mf = numpy.mean(mfcc,axis = 0)    # Final MFCC calc
    
    
    
    
    mfcc_list.append(tuple(avg_mf))     #Add to list

Wall time: 8min 40s


In [20]:
mfcc_list

norm_mfcc = mfcc_list / numpy.linalg.norm(mfcc_list)
norm_mfcc
##mfcc_list

array([[  4.96646871e-04,   6.64354385e-03,  -3.48481644e-02, ...,
         -2.26240203e-02,  -2.04498437e-02,  -8.90722019e-03],
       [ -3.81806007e-03,  -1.08525255e-02,  -3.19801097e-02, ...,
         -5.82960825e-03,  -2.14654830e-02,  -8.07250403e-03],
       [ -1.31387061e-02,  -1.14949978e-03,  -7.39691503e-03, ...,
         -2.10410782e-02,  -1.17217784e-02,  -1.34686547e-02],
       ..., 
       [ -7.25354790e-03,  -4.12069179e-03,  -2.06653727e-03, ...,
         -4.63689167e-03,  -7.39832716e-03,  -1.35942178e-02],
       [ -6.84076317e-03,  -1.50544463e-02,   6.75730111e-03, ...,
         -3.07053342e-04,  -1.42279894e-02,  -3.92615753e-03],
       [ -8.75178269e-03,  -1.79277674e-02,  -1.35302729e-05, ...,
          2.26101661e-03,   3.79991494e-04,   8.51915380e-03]])

In [23]:
import csv

with open("output_mfcc.csv", "w", newline='') as f:
    writer = csv.writer(f)
    writer.writerows(norm_mfcc)