The following cell loads in all the libraries that we previously installed; this makes it so that we can access the functions in these libraries instead of having to code these capabilities from scratch.

In [1]:
import csv
import cv2
import numpy as np
import librosa
import librosa.display
import wave
import os
import matplotlib.pyplot as plt
import multiprocessing 
import pickle

The next cell defines the folder paths to both the source folder where the audio is stored and the folder where the generated images should go.

In [2]:
SOURCE_FOLDER_TRAIN= '../AudioData/Cats-Vs-Dogs/'
OUTPUT_FOLDER_TRAIN = '../GeneratedData/Cats-Vs-Dogs/'

The following cell defines the filename where the Spectrum Variables should be read from. This will load in a file that tells this script how to compute the spectrogram.

In [4]:
#Loading in the Spectrogram variables
FileName =input("Please type filename without the file ending here => ")
SpectrumVariables = pickle.load(open( FileName+'.SpecVar', "rb" ) )
SpectrumVariables

Please type filename without the file ending here => Standard


{'RESOLUTION': 224,
 'SAMPLE_RATE': 48000,
 'N_FFT': 3000,
 'HOP_LENGTH': 512,
 'FMIN': 0,
 'FMAX': 24000,
 'N_MELS': 224,
 'POWER': 2.1}

This defines the function that creates logarithmic spectragrams of the audio file designated in the `src_path` into the folder in the `dst_path`.

In [5]:
def log_mel_spec_tfm(src_path, dst_path):
    #print(src_path)
    fname=src_path.split('/')[-1];
    x=[]
    sample_rate=0
    try:
        channels ,sample_rate_in = librosa.load(src_path,mono=False)
    except:
        print(fname+" Could not be computed!")
        return
    if(channels.ndim==1):
        channels= channels.reshape((1,(len(channels))))
    channelcounter=0
    for audio_data in channels:
        channelcounter+=1
        
        RESOLUTION=SpectrumVariables["RESOLUTION"] 
        
        N_FFT=SpectrumVariables["N_FFT"]
        HOP_LENGTH= SpectrumVariables["HOP_LENGTH"]
        FMIN=SpectrumVariables["FMIN"]
        FMAX=SpectrumVariables["FMAX"]
        N_MELS=SpectrumVariables["N_MELS"]
        POWER=SpectrumVariables["POWER"]
        #audio_data = librosa.resample(x, sample_rate_in, SAMPLE_RATE)
        mel_spec_power = librosa.feature.melspectrogram(audio_data, sr=sample_rate_in, n_fft=N_FFT, 
                                                        hop_length=HOP_LENGTH, 
                                                        n_mels=N_MELS, power=POWER,
                                                       fmin=FMIN,fmax=FMAX)
        mel_spec_db = librosa.power_to_db(mel_spec_power, ref=np.max)
        #pickle.dump(mel_spec_db, open( './lastSpect.pickle', "wb" ))
        image = mel_spec_db; # convert to float
        image -= image.min() # ensure the minimal value is 0.0
        image /= image.max() # maximum value in image is now 1.0
        image*=256;
        img = image.astype(np.uint8)
        colorPic = cv2.applyColorMap(img, cv2.COLORMAP_BONE)
        #cv2.imshow('dst_rt', colorPic)
        #cv2.waitKey(0)
        count=0
        for i in range(int(np.floor(colorPic.shape[1]/RESOLUTION))):
            count+=1
            startx=RESOLUTION*i
            stopx=RESOLUTION*(i+1)
            OutputImage = cv2.resize(colorPic[:,startx:stopx,:],(RESOLUTION,RESOLUTION))
            plt.imsave(os.path.join(dst_path,(fname.replace(".wav",'-').replace(".m4a",'-').replace(".mp3",'-') +str(i)+'-ch-'+str(channelcounter)+ '.png')), OutputImage) 
        if(count==0):
            print(src_path)
            #print("WARNING: => Jumped A file. If you see this often, increas sampleRate or Hop length.")

The following folder is a wrapper function for the previous function, going through all the audio files in `IN_FOLDER`.

In [6]:
def ComputeSpectrograms(IN_FOLDER,OUT_FOLDER):
    print("I will print the file path to files that are too short to fit into one full picture.")
    SourceFoldersLabels = [f.path for f in os.scandir(IN_FOLDER) if f.is_dir()]
    for path in SourceFoldersLabels:
        FileList = [f.path for f in os.scandir(path) if f.is_file() and (f.name.endswith(".wav") or f.name.endswith(".m4a") or f.name.endswith(".mp3"))]
        Label = path.split('/')[-1]
        outFolder = os.path.join(OUT_FOLDER,Label)
        if not os.path.exists(outFolder):
            os.makedirs(outFolder)
        with multiprocessing.Pool(12) as p:
            p.starmap(log_mel_spec_tfm, [(f,outFolder) for f in FileList])
        print("Finished this class. Going to the next")
    print("Done generating spectra!")  

This moves the Spectrum Variables file into the output folder so that we can correspond the images with labels.

In [7]:
ComputeSpectrograms(SOURCE_FOLDER_TRAIN,OUTPUT_FOLDER_TRAIN)
pickle.dump(SpectrumVariables, open(os.path.join(OUTPUT_FOLDER_TRAIN,'Main.SpecVar'), "wb" ))
print("Notebook Done")

I will print the file path to files that are too short to fit into one full picture.




../AudioData/Cats-Vs-Dogs/Dogs/231762__davidmenke__fx-dog-barking.wav




../AudioData/Cats-Vs-Dogs/Dogs/231762__davidmenke__fx-dog-barking.wav
../AudioData/Cats-Vs-Dogs/Dogs/163459__littlebigsounds__lbs-fx-dog-small-alert-bark001.wav
../AudioData/Cats-Vs-Dogs/Dogs/163459__littlebigsounds__lbs-fx-dog-small-alert-bark001.wav




../AudioData/Cats-Vs-Dogs/Dogs/347763__apolloaiello__dog-barking.wav
../AudioData/Cats-Vs-Dogs/Dogs/347763__apolloaiello__dog-barking.wav


Process ForkPoolWorker-1:
Traceback (most recent call last):
  File "/Users/dg/anaconda3/envs/Workshop/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/dg/anaconda3/envs/Workshop/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/dg/anaconda3/envs/Workshop/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/Users/dg/anaconda3/envs/Workshop/lib/python3.6/multiprocessing/pool.py", line 47, in starmapstar
    return list(itertools.starmap(args[0], args[1]))
  File "<ipython-input-5-17a779f1e6a1>", line 29, in log_mel_spec_tfm
    fmin=FMIN,fmax=FMAX)
  File "/Users/dg/anaconda3/envs/Workshop/lib/python3.6/site-packages/librosa/feature/spectral.py", line 1793, in melspectrogram
    pad_mode=pad_mode)
  File "/Users/dg/anaconda3/envs/Workshop/lib/python3.6/site-packages/librosa/core/spectrum.py", line 1838, in _spectro

24965__www-bonson-ca__bigdogbarking-02.wav Could not be computed!


KeyboardInterrupt: 