In [1]:
import tensorflow as tf
import tensorflow_io as tfio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [2]:
def load_audio_16k(filename, mono=False):
    audio = tfio.audio.AudioIOTensor(filename, dtype=tf.float32)

    sample_rate = tf.cast(audio.rate, dtype=tf.int64)

    audio = audio.to_tensor()

    if mono:
        audio = audio[:,0]
    
    audio = tfio.audio.resample(audio, rate_in=sample_rate, rate_out=16000)

    return audio

In [3]:
def preprocess(file_path, label):
    audio = load_audio_16k(file_path)
    #Using two channels
    step1 = len(audio[:,0]) // 16
    step2 = len(audio[:,0]) // 33

    labels = np.ones((30,1), dtype=np.float32) * label


    steps1 = np.arange(15)*step1
    steps2 = np.arange(start=1, stop=30, step=2)*step2
    audioWindow = np.arange(10000, 58000).reshape(-1,1)
    audioWindow1 = audioWindow + steps1
    audioWindow2 = audioWindow + steps2


    audio = tf.transpose(tf.concat([tf.gather(audio[:,0], audioWindow1), tf.gather(audio[:,1], audioWindow2)], 1))

    spectrograms = tfio.audio.spectrogram(audio, nfft=512, window=960, stride=240)

    spectrograms = tfio.audio.melscale(spectrograms, rate=16000, mels=100, fmin=0, fmax=4000)
    spectrograms = tfio.audio.dbscale(spectrograms, top_db=70)


    spectrograms = tf.expand_dims(spectrograms, axis=3)
    
    return spectrograms, labels

In [4]:
def getDataPaths(*path):
    dataRelativePath = os.path.join(*path)

    supportedAudioFiles = ['/*.flac', '/*.wav', '/*.ogg', '/*.mp3', '/*.mp4a']
    supportedAudioFilesGlob = []

    for supportedAudioFile in supportedAudioFiles:
        supportedAudioFilesGlob.append(dataRelativePath + supportedAudioFile)

    dataPaths = tf.data.Dataset.list_files(supportedAudioFilesGlob, shuffle=False, seed=1234)
    return dataRelativePath, dataPaths

In [5]:
def getLabels(dataRelativePath, dataPaths):
    labels = []
    fileIndex = len(dataRelativePath)
    for elem in dataPaths:
        fileStringPath = elem.numpy().decode('utf-8')
        underscoreIndex = fileStringPath.find('_',fileIndex)
        labels.append(float(fileStringPath[fileIndex + 1:underscoreIndex]))
    
    return labels

In [6]:
def getData(*args):
    dataRelativePath, dataPaths = getDataPaths(*args)
    labels = getLabels(dataRelativePath, dataPaths)
    labels = tf.data.Dataset.from_tensor_slices(labels)
    return tf.data.Dataset.zip((dataPaths, labels))

In [9]:
data = getData('Extraction2')

In [10]:
for pathD, labelD in data.as_numpy_iterator():
    print(pathD)
    valid = preprocess(pathD, labelD)

b'Extraction2\\105_GimmexGimme.mp3'
b'Extraction2\\108_Cutlery.mp3'
b'Extraction2\\110_Blessing.mp3'
b'Extraction2\\115_Ready steady.mp3'
b'Extraction2\\124_Cinema.mp3'
b'Extraction2\\127_Happy synthesizer.mp3'
b'Extraction2\\128_Dreamin chuchu.mp3'
b'Extraction2\\128_Miku.mp3'
b'Extraction2\\128_Next nest.mp3'
b'Extraction2\\130_Ifuudoudou.mp3'
b'Extraction2\\130_Traffic jam.mp3'
b'Extraction2\\130_Yoru ni kakeru.mp3'
b'Extraction2\\132_Once upon a dream.mp3'
b'Extraction2\\134_Otome kaibou.mp3'
b'Extraction2\\138_Mirai.mp3'
b'Extraction2\\140_Echo.mp3'
b'Extraction2\\140_Forward.mp3'
b'Extraction2\\140_Hibikase.mp3'
b'Extraction2\\140_RAD dogs.mp3'
b'Extraction2\\143_Oki ni mesu mama.mp3'
b'Extraction2\\145_Junky night town orchestra.mp3'
b'Extraction2\\149_Dramaturgy.mp3'
b'Extraction2\\150_On the rocks.mp3'
b'Extraction2\\150_Roki.mp3'
b'Extraction2\\150_Tell your world.mp3'
b'Extraction2\\152_Color of drops.mp3'
b'Extraction2\\152_Venom.mp3'
b'Extraction2\\153_Darling dance.mp3'
b