# This file demonstrates how to load the custom dataset from disk to the TF model

In [1]:
from wav_2_spectr_utils import *

import tensorflow as tf
import numpy as np
import random

2022-08-09 18:53:32.441319: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-08-09 18:53:32.441441: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
# Ref: https://medium.com/analytics-vidhya/write-your-own-custom-data-generator-for-tensorflow-keras-1252b64e41c3
class CustomDataGen(tf.keras.utils.Sequence):
    
    def __init__(self, files_list,
                 output_classes,
                 batch_size=32,
                 input_size=(99, 43, 1),
                 shuffle=True):
        
        self.output_classes = output_classes
        self.files_list = files_list
        self.batch_size = batch_size
        self.input_size = input_size
        self.shuffle = shuffle
        
        self.n = len(self.files_list)
    

    def on_epoch_end(self):
        if self.shuffle:
            random.shuffle(self.files_list)

    def __get_data(self, file):
        # data/yes/yes_0_3.npy --> ['yes', 'yes_0_3.npy'] --> 'yes'
        y_str = file.split('/')[-2:][0]
        y = self.output_classes.index(y_str)
        return (np.load(file), y)
    
    def __getitem__(self, index):
        X = []
        y = []
        for file in self.files_list[index : index+self.batch_size]:
            spectr, label = self.__get_data(file)
            X.append(spectr)
            y.append(label)
        
        return (np.array(X), np.array(y))
    
    def __len__(self):
        return self.n // self.batch_size

In [3]:
CLASSES_DATASET_PATH = "/home/Radhi/Desktop/TinyML_Book/voice-controlled-robot/model/speech_data_classes_npy"
model_classes = [
    'yes',
    'off',
    'left',
    'right',

    # Keep the _invalid one always last
    '_invalid',
]

In [4]:
print("--- Collecting .npy files ---")
files_list = []
for word in model_classes:
    files_list = files_list + [file for file in get_spectr_files(CLASSES_DATASET_PATH, word)]

--- Collecting files ---


In [5]:
traingen = CustomDataGen(files_list,
                         model_classes,
                         batch_size=32,
                         input_size=(99, 43, 1))

In [10]:
img, label = traingen[0]
print(img.shape, label.shape)

(32, 99, 43, 1) (32,)
