In [2]:
import numpy as np
import librosa, librosa.display
import matplotlib.pyplot as plt

mfcc_path = "mfcc.json"

FIG_SIZE = (15,10)

file_path = "genres/blues/blues.00000.wav"

# load audio file with Librosa
signal, sample_rate = librosa.load(file, sr=22050)

In [None]:

# WAVEFORM
# display waveform
plt.figure(figsize=FIG_SIZE)
librosa.display.waveplot(signal, sample_rate, alpha=0.4)
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.title("Waveform")

In [None]:
# FFT -> power spectrum
# perform Fourier transform
fft = np.fft.fft(signal)

# calculate abs values on complex numbers to get magnitude
spectrum = np.abs(fft)

# create frequency variable
f = np.linspace(0, sample_rate, len(spectrum))

# take half of the spectrum and frequency
left_spectrum = spectrum[:int(len(spectrum)/2)]
left_f = f[:int(len(spectrum)/2)]

# plot spectrum
plt.figure(figsize=FIG_SIZE)
plt.plot(left_f, left_spectrum, alpha=0.4)
plt.xlabel("Frequency")
plt.ylabel("Magnitude")
plt.title("Power spectrum")


In [None]:
# STFT -> spectrogram
hop_length = 512 # in num. of samples
n_fft = 2048 # window in num. of samples

# calculate duration hop length and window in seconds
hop_length_duration = float(hop_length)/sample_rate
n_fft_duration = float(n_fft)/sample_rate

print("STFT hop length duration is: {}s".format(hop_length_duration))
print("STFT window duration is: {}s".format(n_fft_duration))

# perform stft
stft = librosa.stft(signal, n_fft=n_fft, hop_length=hop_length)

# calculate abs values on complex numbers to get magnitude
spectrogram = np.abs(stft)

# display spectrogram
plt.figure(figsize=FIG_SIZE)
librosa.display.specshow(spectrogram, sr=sample_rate, hop_length=hop_length)
plt.xlabel("Time")
plt.ylabel("Frequency")
plt.colorbar()
plt.title("Spectrogram")

In [None]:
# apply logarithm to cast amplitude to Decibels
log_spectrogram = librosa.amplitude_to_db(spectrogram)

plt.figure(figsize=FIG_SIZE)
librosa.display.specshow(log_spectrogram, sr=sample_rate, hop_length=hop_length)
plt.xlabel("Time")
plt.ylabel("Frequency")
plt.colorbar(format="%+2.0f dB")
plt.title("Spectrogram (dB)")

In [7]:
# MFCCs
# extract 13 MFCCs
MFCCs = librosa.feature.mfcc(signal, sample_rate, n_fft=n_fft, hop_length=hop_length, n_mfcc=13)


print(MFCCs)
# # display MFCCs
# plt.figure(figsize=FIG_SIZE)
# librosa.display.specshow(MFCCs, sr=sample_rate, hop_length=hop_length)
# plt.xlabel("Time")
# plt.ylabel("MFCC coefficients")
# plt.colorbar()
# plt.title("MFCCs")

# # show plots
# plt.show()



[[-211.48465    -208.94481    -193.90889    ... -109.999146
   -86.84641     -79.03764   ]
 [  99.022964    101.24678     102.24396    ...  150.07935
   138.89685     140.24707   ]
 [ -10.634699     -9.346699      1.9154348  ...  -50.795135
   -36.613983    -31.443943  ]
 ...
 [  -2.1509948    -3.7085578    -9.185015   ...  -12.147305
    -9.283383    -11.596716  ]
 [  -2.5450842     0.64133793   -2.1272187  ...    6.2527533
     2.7007575     1.2497692 ]
 [  -2.3836365    -1.6919363    -3.8449044  ...    4.9566736
    -2.6966052    -7.690168  ]]


In [None]:
=

In [None]:
def save_mfcc(file_path, mfcc_path, num_mfcc = 13, n_fft=2048, hop_length=512):
    data = {
        "mfcc": [],
    }
    mfcc = librosa.feature.mfcc(signal, sample_rate, n_fft=n_fft, hop_length=hop_length, n_mfcc=n_mfcc)
    mfcc = mfcc.T
    
    data["mfcc"].append(mfcc.tolist())

    
with open(mfcc_path, "w") as fp:
    json.dump(data, fo, indent=4)
    
    
    
