In [88]:
import glob
import os
import random
import tensorflow as tf
import numpy as np

In [89]:
#declare global variables
TRAIN_SET_DIR = os.path.join('..', 'archive','TRAIN_V2','data_out_2')
RANDOM_SEED = 42
VALIDATION_RATE = 0.1
SAMPLING_RATE = 22050
BATCH_SIZE = 32
EPOCHS = 100

In [90]:
#create array to store all the training song paths
song_paths = []
song_genre_nums = []

# loop over each music genre
for genre_num in os.listdir(TRAIN_SET_DIR):
    
    # in each music genre, add the paths of all the music to the array
    for song_path in os.listdir(os.path.join(TRAIN_SET_DIR, genre_num)):
        song_paths.append(os.path.join(TRAIN_SET_DIR, genre_num, song_path))
        song_genre_nums.append(genre_num)

In [91]:
# suffle both arrays with the same shuffle seed
r = np.random.seed(RANDOM_SEED)
random.shuffle(song_paths)
r = np.random.seed(RANDOM_SEED)
random.shuffle(song_genre_nums)

In [92]:
# split both into training and testing sets
num_songs = len(song_genre_nums)
num_validation = int(num_songs * VALIDATION_RATE)

print("number of validations")
print(num_validation)
print('number of trainings')
print(num_songs - num_validation)

validation_genres = song_genre_nums[-num_validation :]
validation_paths = song_paths[-num_validation :]

training_genres = song_genre_nums[: -num_validation]
training_paths = song_paths[: -num_validation]

number of validations
1990
number of trainings
17919


In [126]:
# some useful funcs

# create tf dataset from the array of paths and flags
def create_audio_dataset(paths, genre_nums):

    paths = tf.data.Dataset.from_tensor_slices(paths)
    samples = paths.map(path_to_audio_samples)

    # debug:
    # print("Number of paths: {}".format(len(paths)))
    # print("samples: {}".format(samples.cardinality()))

    flags = tf.data.Dataset.from_tensor_slices(genre_nums)
    return tf.data.Dataset.zip((samples, flags))
    
# given an audio file path, return audio samples
def path_to_audio_samples(path):
    # print(path.shape())
    audio = tf.io.read_file(path)
    audio_tensor, _ = tf.audio.decode_wav(audio, 1, SAMPLING_RATE)

    if audio_tensor is not None:
        return audio_tensor
    else:
        return tf.zeros([SAMPLING_RATE, 1])

def audio_to_fft(audio_data):
    # call fft on first element, keep second element as is
    audio = tf.squeeze(audio_data)
    fft = tf.signal.fft(
        tf.complex(real = audio, imag = tf.zeros_like(audio))
    )
    
    fft = tf.expand_dims(fft, axis = -1)

    shape = tf.shape(audio)
    print("audio shape: {}".format(shape))

    # Thanks a lot for this one, chatGPT!!!
    # This can probably introduce some issues
    # we are basically assuming the length of the tracks that we aren't sure of
    length = shape[0] // 2 if shape[0] is not None else shape[-1]

    return tf.abs(fft[:, : length , :])

validation_dataset = create_audio_dataset(validation_paths, validation_genres).shuffle(RANDOM_SEED).batch(BATCH_SIZE)
training_dataset = create_audio_dataset(training_paths, training_genres).shuffle(RANDOM_SEED).batch(BATCH_SIZE)

print("validation dataset size: {}".format(validation_dataset.cardinality()))
print("training dataset size: {}".format(training_dataset.cardinality()))

# validation_dataset.shuffle(RANDOM_SEED).batch(BATCH_SIZE)
# training_dataset.shuffle(RANDOM_SEED).batch(BATCH_SIZE)

validation_dataset.map(lambda x, y: (audio_to_fft(x), y), num_parallel_calls=tf.data.AUTOTUNE)
training_dataset.map(lambda x, y: (audio_to_fft(x), y), num_parallel_calls=tf.data.AUTOTUNE)


validation dataset size: 63
training dataset size: 560
audio shape: Tensor("Shape:0", shape=(None,), dtype=int32)
audio shape: Tensor("Shape:0", shape=(None,), dtype=int32)


<ParallelMapDataset shapes: (<unknown>, (None,)), types: (tf.float32, tf.string)>

In [None]:
# call prefetch on both datasets
validation_dataset = validation_dataset.prefetch(tf.data.AUTOTUNE)
training_dataset = training_dataset.prefetch(tf.data.AUTOTUNE)