In [1]:
import pandas as pd
import numpy as np
import scipy
import soundfile as sf

from scipy.io import wavfile
import librosa
import librosa.display
import os

In [2]:
def halfSecondSplit(samples, samplerate):
    """Takes a dataframe with 'Audio Data' and 'Samplerate' columns, splits it into 0.5 second fragments"""

    #disregards less than half a second of audio files
    halfSecondSamples = samplerate/2
    samples = samples[:int((len(samples)-(len(samples) % halfSecondSamples)))]
    array = np.reshape(samples, (int(samples.size/halfSecondSamples), int(halfSecondSamples)))
    
    return array

In [3]:
def loadAudio(path): 
    audiodata, sr = sf.read(path)
    halfseconds = halfSecondSplit(audiodata,sr)
    return halfseconds, sr

In [4]:
def convertToSpectro(data, samplerate):
    Spectro = librosa.feature.melspectrogram(data, sr=samplerate, window=scipy.signal.windows.hann, n_fft=int(samplerate/4), hop_length = int(samplerate/8))
    return Spectro

In [5]:
def convertToMFCC(data, samplerate):
    mfcc = librosa.feature.mfcc(y = data, sr = samplerate, n_mfcc=40)
    return mfcc

In [6]:
def wavsToData(audiofiledir):
    audioFragments = []
    for filename in os.listdir(audiofiledir):
        if filename.endswith(".wav"):
            HalfSecondSamples, sr = loadAudio(audiofiledir + filename)
            for halfSecond in HalfSecondSamples:
    #             audioFragments.append(flattenArray(convertToSpectro(halfSecond, sr)))
                audioFragments.append(flattenArray(convertToMFCC(halfSecond, sr)))
    return audioFragments

In [7]:
def saveFragmentsToNP(data):
    np.save('./Output/Output.npy', data)

In [8]:
def flattenArray(data):
    data = np.array(data)
    flatData = data.flatten()
    return flatData

In [12]:
def main():
    data = wavsToData("./Dataset/") # End path with /
    saveFragmentsToNP(data)
main()

In [13]:
print(np.load('./Output/Output.npy').shape)

(2300, 1760)
