In [1]:
# Importing all the important libraries

import glob
import os
import librosa
import re
import numpy as np
librosa.util.MAX_MEM_BLOCK = 262144/4
import librosa.display
import numpy as np
from scipy import misc
from scipy import signal
from sklearn.model_selection import train_test_split

# Loading all the sound files

In [2]:
def chunkIt(seq, num):
    """Extracts chunks from the audio"""
    avg = num
    out = []
    last = 0.0
    while last < len(seq):
        if last+avg < len(seq):
            out.append(seq[int(last):int(last + avg)])
        else:
            out.append(seq[int(len(seq) - avg):int(len(seq))])
        last += avg
    return out

def load_sounds(dataSetDir, input_sr, chunk_size=5):
    """
    Returns the sounds in a time-series format
    The name of the files should be in the ESC-50-audio dataset format
    input_sr is the sampling rate and should be set to default.
    Also returns the names and the labels of files
    """
    
    sounds = []
    names = []
    labels = []
    
    j = 0
    datasetSize = len(os.listdir(dataSetDir))
    
    print("Number of audio samples to be converted : " + str(datasetSize))
    
    for file in os.listdir(dataSetDir):
        
        sound,sr = librosa.load(dataSetDir+"//"+file,sr = input_sr)
        #print(len(sound))
        new_sounds = chunkIt(sound, input_sr*chunksize)
        for i in range(len(new_sounds)):
            sounds.append(new_sounds[i])
            names.append(file[:-4]+str(i))

            label = re.findall(r'\d+\.wav$',file)
            label = int((label[0][:-4]))

            labels.append(label)

            j += 1
            if j % 100000 == 99:
                print(str(j+1) + " audio samples extracted: Progress = " + 
                      str((j+1)/datasetSize * 100) +  "%" )

    return sounds,names,labels

# Enter the relative dataset Directory and the sampling rate

In [3]:
dataSetDir = "InsectSoundDataset//All"
sr = 8000
chunksize = 3

# Load the audio signals in a time series format

In [4]:
soundSet,nameSet, labelSet = load_sounds(dataSetDir, sr,chunksize)
print(len(soundSet))

Number of audio samples to be converted : 74
100 audio samples extracted: Progress = 135.13513513513513%
405


In [5]:
def spectogram_feature_extractor(sound, name, sr, frameLength):
    """
    Extracts the spectogram features from the sound file,
    converts them into a 256 X 256 image and saves them into a
    appropriate folder.
    
    sound is the audio signal in time series format
    name is the name of the original audio file so
    the spectogram image can be saved properly.
    sr is the sampling rates at which the audio files were sampled.
    framelength is the length of each frame considered for obtained the spectogram
    """
    
    frameSpan = int(frameLength*sr) 
    
    tot_seg = sr * 5 * 2 // frameSpan
    nperseg = 10*sr//tot_seg
    
    # Obtaining the spectogram
    f, t, Sxx = signal.spectrogram(sound, sr, nperseg = nperseg,
                                   noverlap = nperseg//2, nfft = max(256,nperseg))
    
    # Converting the spectogram into a 256 x 256 b/w image"
    SxxNew = misc.imresize(np.flip(Sxx,0),(256,256))
    
    # Saving the spectogram in the appropriate directory
    dirName = "ins" + str(sr/1e3) + "KHz_framelength" + str(frameLength*1e3)  + "chunk"+ str(chunksize)
    misc.imsave(dirName + "//" + name + 'spect'  +'.png',SxxNew)
    
    return Sxx

def dataset_spectogram_feature_extractor(soundSet,nameSet,sr,frameLength, chunksize=5):
    """
    Given a list of sound signals and their corresponding names, 
    extracts the spectograms in an image form and saves them in an 
    appropriate folder.
    
    sr is the sampling rates at which the audio files were sampled.
    framelength is the length of each frame considered for obtained the spectogram
    """
    featureSet  = []
    for i in range(len(soundSet)):
        spectogram_feature_extractor(soundSet[i],nameSet[i], sr, frameLength)
    print("Spectogram image features have been extracted and saved in the folder " + 
         "ins" +  str(sr/1e3) + "KHz_framelength" + str(frameLength*1e3)  + "chunk"+ str(chunksize) + ".")
    return  

In [6]:
# Extraction with frame length 20 ms
frameLength = 20 * 1e-3
if (os.path.isdir("ins" + str(sr/1e3) + "KHz_framelength" + str(frameLength*1e3)  + "chunk"+ str(chunksize))  != True):
    os.mkdir("ins" + str(sr/1e3) + "KHz_framelength" + str(frameLength*1e3)  + "chunk"+ str(chunksize))
features = dataset_spectogram_feature_extractor(np.array(soundSet), 
                                                nameSet, sr, frameLength)

Spectogram image features have been extracted and saved in the folder ins8.0KHz_framelength20.0chunk5.


In [7]:
# Extraction with frame length 30 ms
frameLength = 30 * 1e-3
if (os.path.isdir("ins" + str(sr/1e3) + "KHz_framelength" + str(frameLength*1e3)  + "chunk"+ str(chunksize)) != True):
    os.mkdir("ins" + str(sr/1e3) + "KHz_framelength" + str(frameLength*1e3)  + "chunk"+ str(chunksize))
features = dataset_spectogram_feature_extractor(np.array(soundSet), 
                                                nameSet, sr, frameLength)

Spectogram image features have been extracted and saved in the folder ins8.0KHz_framelength30.0chunk5.


In [8]:
# Extraction with frame length 40 ms
frameLength = 40 * 1e-3
if (os.path.isdir("ins" + str(sr/1e3) + "KHz_framelength" + str(frameLength*1e3)  + "chunk"+ str(chunksize)) != True):
    os.mkdir("ins" + str(sr/1e3) + "KHz_framelength" + str(frameLength*1e3)  + "chunk"+ str(chunksize))
features = dataset_spectogram_feature_extractor(np.array(soundSet), 
                                                nameSet, sr, frameLength)

Spectogram image features have been extracted and saved in the folder ins8.0KHz_framelength40.0chunk5.


In [9]:
# Extraction with frame length 50 ms
frameLength = 50 * 1e-3
if (os.path.isdir("ins" + str(sr/1e3) + "KHz_framelength" + str(frameLength*1e3)  + "chunk"+ str(chunksize)) != True):
    os.mkdir("ins" + str(sr/1e3) + "KHz_framelength" + str(frameLength*1e3)  + "chunk"+ str(chunksize))
features = dataset_spectogram_feature_extractor(np.array(soundSet), 
                                     nameSet, sr, frameLength)

Spectogram image features have been extracted and saved in the folder ins8.0KHz_framelength50.0chunk5.
