Overview

Author: stephankokkas

Modified by: rohit

This notebook defines a pipeline that tasks an input directory of audio files and converts them to images using mel-spectrogram transofrmation and preprocessing techniques.

In [None]:
# disable warnings to tidy up output
import warnings
warnings.filterwarnings("ignore")

# some basic libraries 
from platform import python_version
#import pandas as pd
#import seaborn as sns
import numpy as np
import os
import random
from os import listdir
from os.path import isfile, join
import shutil
import torch
from IPython.display import Audio
import pandas as pd
import os
import argparse
import logmmse
from glob import glob
from tqdm import tqdm
import librosa
import soundfile as sf
#import noisereduce as nr
import matplotlib.pyplot as plt

# tensorflow support
import tensorflow as tf
#import tensorflow_transform as tft
import tensorflow_io as tfio
#from tensorflow.contrib.framework.python.ops import audio_ops

# scipy
import scipy
from pydub import AudioSegment, effects

# turn off tensorflow warnings
tf.get_logger().setLevel('ERROR')

# turn off absl warnings
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

# print system information
print('Python Version     : ', python_version())
print('TensorFlow Version : ', tf.__version__)

Python Version     :  3.9.16
TensorFlow Version :  2.12.0


In [None]:
# below code adapted from:
# https://keras.io/getting_started/faq/#how-can-i-obtain-reproducible-results-using-keras-during-development

# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.
np.random.seed(123)

# The below is necessary for starting core Python generated random numbers
# in a well-defined state.
random.seed(123)

# The below set_seed() will make random number generation
# in the TensorFlow backend have a well-defined initial state.
# For further details, see:
# https://www.tensorflow.org/api_docs/python/tf/random/set_seed
tf.random.set_seed(1234)

In [None]:
def enforce_memory_limit(mem_mb):
  # enforce memory limit on GPU

  gpus = tf.config.experimental.list_physical_devices('GPU')
  if gpus:
    try:
      tf.config.experimental.set_virtual_device_configuration(
          gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=mem_mb)])
      print(f"vram limit set to {mem_mb}MB")
    except RuntimeError as e:
      print(e)
      
# enforce max 5GB memory on GPU for this notebook
enforce_memory_limit(5120)

# 1. Preprocessing Pipeline



## **Removing silence & static noise from data** 

**You have to change the paths of your diresctories here**


First, we get audio files of all format from `/content/drive/MyDrive/birdclef2022`, then we use libraries to remove silence and noise from each audio file. then saves it to `/content/OUTPUT_cleaned` 



In [None]:
def envelope(y, rate, threshold):
    mask = []
    y = pd.Series(y).apply(np.abs)
    y_mean = y.rolling(window=int(rate/20),
                       min_periods=1,
                       center=True).max()
    for mean in y_mean:
        if mean > threshold:
            mask.append(True)
        else:
            mask.append(False)
    return mask



def process_audio_file(src_path, dst_path, threshold, sr=16000):
    y, rate = librosa.load(src_path, sr=sr)
    y_reduced_noise = logmmse.logmmse(y, rate, output_file=None)
    mask = envelope(y_reduced_noise, rate, threshold)
    y = y_reduced_noise[mask]
    sf.write(dst_path, y, rate, subtype='VORBIS')

    return y, rate


def plot_audio_comparison(original_audio, processed_audio, rate):
    plt.figure(figsize=(12, 6))
    plt.subplot(2, 1, 1)
    plt.plot(original_audio)
    plt.title('Original Audio')
    plt.subplot(2, 1, 2)
    plt.plot(processed_audio)
    plt.title('Processed Audio')
    plt.tight_layout()
    #plt.show()

def remove_silence(src_root, dst_root, threshold):
    if not os.path.exists(dst_root):
        os.makedirs(dst_root)

    ogg_paths = glob('{}/**/*.ogg'.format(src_root), recursive=True)

    # Add a counter
    counter = 0

    for src_path in tqdm(ogg_paths):
        relative_path = os.path.relpath(src_path, src_root)
        dst_path = os.path.join(dst_root, relative_path)

        dst_dir = os.path.dirname(dst_path)
        if not os.path.exists(dst_dir):
            os.makedirs(dst_dir)

        original_audio, rate = librosa.load(src_path, sr=16000)
        processed_audio, _ = process_audio_file(src_path, dst_path, threshold)

        # Add a conditional statement to only plot the first audio dataset comparison
        if counter == 0:
            plot_audio_comparison(original_audio, processed_audio, rate)

        # Increment the counter
        counter += 1

if __name__ == '__main__':
  src_root = 'birdclef2022'
  dst_root = 'OUTPUT_cleaned' #Directory to save processed bird audio files
  threshold = 0.01 #Threshold for detecting silence in the audio

  remove_silence(src_root, dst_root, threshold)    

**Then on next step, we delete the duplicates, and trip the clips, then split it to test, train and validation data.**

  This pipeline will go through a root directory and find all the audio files that exist and are of accepted format. Then, depending on the params set, it with normalise, trim and split the data. Please ensure you specify the self.DATASET_PATH with the directory of the data, and the self._SET_OUTPUT_DIR with the location you want the output files to end up in.

**Make sure to change your paths according to your setup**

here, we gets the Cleaned data from `'/content/drive/MyDrive/OUTPUT_cleaned'` and save everything to `'/content/'`


In [None]:
class raw_file_pre_processing():
    def __init__(self) -> None:
        self.CLIP_LENGTH   = 5000   # only look at 5000 milliseconds of clip at the start of loaded audio file
        self.BITRATE = "160k"        # all the samples are converted to bit rate of 32000 (Samples/Second)
        self.labels = []
        self.raw_dirs = {}
        self.dataset = pd.DataFrame(columns=['Label', 'FileName', 'FileType', 'Directory'])
        self.TARGET_FORMAT = 'flac'
        self.ACCEPTED_FORMAT = ['.mp3', '.flac', '.aiff', '.mp4', '.m4a', '.wav', '.ogg']
        self.CLEAN_DIR_ = True
        self.TRAIN_SPLIT      = 0.80
        self.TEST_SPLIT       = 0.01
        self.VALIDATION_SPLIT = 0.19
        self.split_file_names = {}

        # WIN
        #self.DATASET_PATH = 'C:\\Users\\steph\\Documents\\birdclef2022\\'
        # MAC
        self.DATASET_PATH  = '/content/drive/MyDrive/OUTPUT_cleaned'
        
        # make sure this is in the same format for either window or mac
        #WIN
        #self._SET_OUTPUT_DIR = 'C:\\Users\\steph\\Downloads\\'
        #MAC
        self._SET_OUTPUT_DIR = '/content/'
        
        
        if '/' in self.DATASET_PATH:
            self.DATASET_PATH = os.path.join('/', *self.DATASET_PATH.split('/'))
            self.OUTPUT_DIR = os.path.join('/', *self._SET_OUTPUT_DIR.split('/'), 'OUTPUT_raw_flac')
            if not os.path.exists(self.OUTPUT_DIR):
                os.mkdir(self.OUTPUT_DIR)
        elif '\\' in str(self.DATASET_PATH):
            self.DATASET_PATH = str(os.path.join(*self.DATASET_PATH.split('\\'))).replace(':', ':\\')
            self.OUTPUT_DIR = str(os.path.join(*self._SET_OUTPUT_DIR.split('\\'), 'OUTPUT_raw_flac')).replace(':', ':\\')
            if not os.path.exists(self.OUTPUT_DIR):
                os.makedirs(self.OUTPUT_DIR)   
                

        self.TRAIN_DIR = os.path.join(str(self.OUTPUT_DIR).replace('OUTPUT_raw_flac', ''), 'TRAIN_raw_flac')
        self.TEST_DIR = os.path.join(str(self.OUTPUT_DIR).replace('OUTPUT_raw_flac', ''), 'TEST_raw_flac')
        self.VALIDATION_DIR = os.path.join(str(self.OUTPUT_DIR).replace('OUTPUT_raw_flac', ''), 'VALIDATION_raw_flac')

        def clean_dir(self):
            if os.path.exists(self.OUTPUT_DIR):
                shutil.rmtree(self.OUTPUT_DIR)
            if os.path.exists(self.TRAIN_DIR):
                shutil.rmtree(self.TRAIN_DIR)
            if os.path.exists(self.TEST_DIR):
                shutil.rmtree(self.TEST_DIR)
            if os.path.exists(self.VALIDATION_DIR):
                shutil.rmtree(self.VALIDATION_DIR)
        if self.CLEAN_DIR_: 
            clean_dir(self)

    def handle_duplicate_files(self) -> bool:
        try:
            for root, dir, files in os.walk(self.DATASET_PATH):
                for _class_ in dir:
                    _file_ = [f.split('.')[0] for f in listdir(os.path.join(root, _class_)) if isfile(join(os.path.join(root, _class_), f))]
                    _set_ = set([x for x in _file_ if _file_.count(x) > 1])

                    if len(_set_) > 0:
                        print(f'Class {_class_} has the following duplicates: {_set_}. Will remove dupliactes...')
                        for _elem_ in _set_:
                            for dir_file in [f for f in listdir(os.path.join(root, _class_)) if isfile(join(os.path.join(root, _class_), f))]:
                                if _elem_ in dir_file:
                                    os.remove(os.path.join(root, _class_, dir_file))      
                break
            return True
        except Exception as e:
            print(e)
            print('Terminating...')
            return False
                    
    def get_raw_file_paths(self):
        if self.handle_duplicate_files():
            print(f'Looking for files... acceptable formats include: {self.ACCEPTED_FORMAT}')
            for root, dir, files in os.walk(self.DATASET_PATH):
                if dir == []:
                    tmp_lable = os.path.split(root)[-1]
                    tmp_file_dir = []
                    tmp_filename_ = []
                    for file in files:
                        for ext in self.ACCEPTED_FORMAT:
                            if ext in str(file):
                                tmp_file_dir.append(os.path.join(root, file))
                                tmp_filename_.append(str(str(os.path.split(os.path.join(root, file))[-1]).split('.')[0]).replace('_', ''))

                            
                    self.raw_dirs.update({tmp_lable:tmp_file_dir})
                    self.split_file_names.update({tmp_lable:tmp_filename_})

            for key in self.raw_dirs:
                print(f'FOUND: {key} -> {len(self.raw_dirs[key])}')

    def audio_preprocessing(self, TRIM_AUDIO:bool = False, 
                                  NORM_AUDIO:bool = False):
        print('\nConvering audio files....\n')
        if not os.path.exists(self.OUTPUT_DIR):
            os.makedirs(self.OUTPUT_DIR)

        
        for key, item in self.split_file_names.items():
            train = item[int(len(item) * .00) : int(len(item) * self.TRAIN_SPLIT)]
            vali = item[int(len(item) * self.TRAIN_SPLIT) : int(len(item) * (self.TRAIN_SPLIT + self.VALIDATION_SPLIT))]
            test = item[int(len(item) * (self.TRAIN_SPLIT + self.VALIDATION_SPLIT)) : int(len(item) * 1.00)]
            self.split_file_names.update({key: [train, vali, test]})

        for key, item in self.raw_dirs.items():
            print(f'Converting {key} data ->> ...')
            tmp_dir_key = os.path.join(self.OUTPUT_DIR, key)
            if not os.path.exists(tmp_dir_key):
                os.makedirs(tmp_dir_key)

            for dir in item:
                try:
                    # read file
                    tmp_file_name = str(os.path.split(dir)[-1].split('.')[0]).replace('_', '')
                    raw_sound = AudioSegment.from_file(dir, format=dir.split('.')[-1])

                    if NORM_AUDIO:
                        # normalise file
                        raw_sound = effects.normalize(raw_sound)

                    # trim file
                    if TRIM_AUDIO:
                        arr_split_file = [raw_sound[idx:idx + self.CLIP_LENGTH] for idx in range(0, len(raw_sound), self.CLIP_LENGTH)]             
                        for count_sample, sample in enumerate(arr_split_file):
                            # padding audio < 5s
                            if len(sample) < self.CLIP_LENGTH:
                                silence = AudioSegment.silent(duration=((self.CLIP_LENGTH-len(sample))))
                                sample = sample + silence  # Adding silence after the audio

                            # export raw file
                            tmp_raw_new_dir = os.path.join(tmp_dir_key, tmp_file_name + '_raw_trim_sample_' + str(count_sample) + '.' + self.TARGET_FORMAT)
                            sample.export(tmp_raw_new_dir, format=self.TARGET_FORMAT, bitrate=self.BITRATE, parameters = [])

                            new_row = pd.Series({"Label": key,
                                        "FileName": tmp_file_name + '_raw_trim_sample_' + str(count_sample) + '.' + self.TARGET_FORMAT,
                                        "FileType": self.TARGET_FORMAT,
                                        "Directory": tmp_raw_new_dir})
                            self.dataset = pd.concat([self.dataset, new_row.to_frame().T], ignore_index=True)
                    else:
                        tmp_raw_new_dir = os.path.join(tmp_dir_key, tmp_file_name + '_raw_' + '.' + self.TARGET_FORMAT)
                        raw_sound.export(tmp_raw_new_dir, format=self.TARGET_FORMAT, bitrate=self.BITRATE, parameters = [])

                        new_row = pd.Series({"Label": key,
                                    "FileName": tmp_file_name + '_raw_' + '.' + self.TARGET_FORMAT,
                                    "FileType": self.TARGET_FORMAT,
                                    "Directory": tmp_raw_new_dir})
                        self.dataset = pd.concat([self.dataset, new_row.to_frame().T], ignore_index=True)
                except Exception as e:
                    print(e)

    def train_test_split_fun(self):
        print(f'\nSplitting data into sub-directories Train, Test and Validation...')

        if not os.path.exists(self.TRAIN_DIR):
            os.mkdir(self.TRAIN_DIR)
        if not os.path.exists(self.TEST_DIR):
            os.mkdir(self.TEST_DIR)
        if not os.path.exists(self.VALIDATION_DIR):
            os.mkdir(self.VALIDATION_DIR)


        dict_keys = self.dataset['Label'].value_counts().to_dict()
        for key, item in dict_keys.items():
            if not os.path.exists(os.path.join(self.TRAIN_DIR, key)):
                os.mkdir(os.path.join(self.TRAIN_DIR, key))
            if not os.path.exists(os.path.join(self.TEST_DIR, key)):
                os.mkdir(os.path.join(self.TEST_DIR, key))
            if not os.path.exists(os.path.join(self.VALIDATION_DIR, key)):
                os.mkdir(os.path.join(self.VALIDATION_DIR, key))


        # self.dataset.to_csv(os.path.join(self.OUTPUT_DIR, 'raw_files.csv'))

        for index, row in self.dataset.iterrows():
            i = str(row.Directory)
            for key, item in self.split_file_names.items():
                if str(row.FileName).split('_')[0] in item[0]:
                    os.replace(i, i.replace('OUTPUT_raw_flac', 'TRAIN_raw_flac'))
                if str(row.FileName).split('_')[0] in item[1]:
                    os.replace(i, i.replace('OUTPUT_raw_flac', 'VALIDATION_raw_flac'))
                if str(row.FileName).split('_')[0] in item[2]:
                    os.replace(i, i.replace('OUTPUT_raw_flac', 'TEST_raw_flac'))

        shutil.rmtree(self.OUTPUT_DIR)

In [None]:
data_preprocessing_pipeline = raw_file_pre_processing()

data_preprocessing_pipeline.get_raw_file_paths()


print('\nThis next process will take approx 20 mins for the current bird dataset depending on the speed of your computer')
data_preprocessing_pipeline.audio_preprocessing(TRIM_AUDIO=True, NORM_AUDIO=True)
data_preprocessing_pipeline.train_test_split_fun()

Looking for files... acceptable formats include: ['.mp3', '.flac', '.aiff', '.mp4', '.m4a', '.wav', '.ogg']
FOUND: sheowl -> 128
FOUND: wiltur -> 76
FOUND: brant -> 135
FOUND: jabwar -> 78
FOUND: spodov -> 107

This next process will take approx 20 mins for the current bird dataset depending on the speed of your computer

Convering audio files....

Converting sheowl data ->> ...
Decoding failed. ffmpeg returned error code: 1

Output from ffmpeg/avlib:

ffmpeg version 4.2.7-0ubuntu0.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec

## Melspectrogram Pipeline

This pipeline will get all the new flac files, convert them to tfio tensors, convert to spectrograms, convert again to mel-spectrograms. It will then save the tensors as .pt files which can be read again




In [None]:
class mel_spectrogram_pipeline():
    def __init__(self, output_directory) -> None:
        self.target_dir = ''
        self.labels = []
        self.augmented_dirs = {}
        self.OUTPUT_DIR = {}
        self.ACCEPTED_FORMAT = '.flac'

        # do not change, this will be brought across from the previous pipeline
        self._SET_OUTPUT_DIR = output_directory
        if '/' in self._SET_OUTPUT_DIR:
            self.DATASET_PATH = self._SET_OUTPUT_DIR
            self.TENSOR_OUTPUT_DIR = os.path.join(self.DATASET_PATH, 'OUTPUT_tensors')
        elif '\\' in self._SET_OUTPUT_DIR:
            self.DATASET_PATH = self._SET_OUTPUT_DIR
            self.TENSOR_OUTPUT_DIR = os.path.join(self.DATASET_PATH, 'OUTPUT_tensors')

        self.NFFT = 512
        self.WINDOW = 512
        self.STRIDE = 512
        self.SAMPLE_RATE = int(44100/2)
        self.MELS = 128
        self.FMIN = 0
        self.FMAX = int(self.SAMPLE_RATE)/2
        self.TOP_DB = 80

    
    def clean_dirs(self) -> None:
        print('Cleaning tensor directory')
        if os.path.exists(self.TENSOR_OUTPUT_DIR):
            shutil.rmtree(self.TENSOR_OUTPUT_DIR)

    def get_output_dir(self) -> None:
        print('Finding all pre-processed files')
        for root, dir, files in os.walk(self.DATASET_PATH):
            if "TRAIN_raw_flac" not in dir:
                if "OUTPUT" not in dir:
                    raise ValueError('Cant find any directories with pre-processed data. Looking for OUTPUT or TRAIN, TEST, and VAIDATION')
                else:
                    self.OUTPUT_DIR.update({"OUTPUT": os.path.join(root, "OUTPUT")})
            else:
                if "TEST_raw_flac" in dir and "VALIDATION_raw_flac" in dir:
                    self.OUTPUT_DIR.update({"TRAIN": os.path.join(root, "TRAIN_raw_flac")})
                    self.OUTPUT_DIR.update({"TEST": os.path.join(root, "TEST_raw_flac")})
                    self.OUTPUT_DIR.update({"VALIDATION": os.path.join(root, "VALIDATION_raw_flac")})
            if not self.OUTPUT_DIR:
                raise ValueError('Cant find any directories with pre-processed data. Looking for OUTPUT or TRAIN, TEST, and VAIDATION')

            print(f'\nFound the following directories {self.OUTPUT_DIR}\n')
            break
            
    def get_preprocessed_files(self) -> None:
        self.get_output_dir()

        for key, item in self.OUTPUT_DIR.items():
            for root, dir, files in os.walk(item):
                if dir == []:
                    tmp_lable = str(os.path.split(root)[-1]) + "~" + key
                    tmp_file_dir = []
                    for file in files:
                        if self.ACCEPTED_FORMAT in str(file):
                            tmp_file_dir.append(os.path.join(root, file))
                            
                    self.augmented_dirs.update({tmp_lable:tmp_file_dir})

        for key in self.augmented_dirs:
            print(f'FOUND: {key} -> {len(self.augmented_dirs[key])}')

    def generate_mel_spectrograms(self, MEL_SPECTRO:bool = False, 
                                        SHOW_PLOT:bool = False, 
                                        FREQ_MASK:bool = False, 
                                        TIME_MASK:bool = False,
                                        TORCH_EXPORT:bool = False, 
                                        TFIO_EXPORT:bool = False) -> None:
                                        
        print(f'\nGenerating tensors... \n')
        for key, item in self.augmented_dirs.items():
            for dir in item:
                tmp_label = key.split('~')[0]
                tmp_set = key.split('~')[1]
                if not os.path.exists(os.path.join(self.TENSOR_OUTPUT_DIR)):
                    os.mkdir(os.path.join(self.TENSOR_OUTPUT_DIR))
                if not os.path.exists(os.path.join(self.TENSOR_OUTPUT_DIR, tmp_set)):
                    os.mkdir(os.path.join(self.TENSOR_OUTPUT_DIR, tmp_set))
                if not os.path.exists(os.path.join(self.TENSOR_OUTPUT_DIR, tmp_set, tmp_label)):
                    os.mkdir(os.path.join(self.TENSOR_OUTPUT_DIR, tmp_set, tmp_label))
       
                file_contents=tf.io.read_file(dir)
                try:
                    tmp_audio_t = tfio.audio.decode_flac(input=file_contents, dtype=tf.int16)
                except:
                    tmp_audio_t = tfio.audio.decode_flac(input=file_contents, dtype=tf.int32)
                    
                tmp_audio_t = tf.cast(tmp_audio_t, tf.float32)
                    
                tmp_audio_t = tfio.audio.resample(tmp_audio_t, tfio.audio.AudioIOTensor(dir)._rate.numpy(), self.SAMPLE_RATE)

                # Convert to spectrogram
                spectrogram = tfio.audio.spectrogram(
                    tmp_audio_t[:, 0], nfft=self.NFFT, window=self.WINDOW, stride=self.STRIDE)

                if SHOW_PLOT:
                    plt.figure()
                    plt.imshow(tf.math.log(spectrogram).numpy())

                if MEL_SPECTRO:
                    # # Convert to mel-spectrogram
                    mel_spectrogram = tfio.audio.melscale(
                        spectrogram, rate=self.SAMPLE_RATE, mels=self.MELS, fmin=self.FMIN, fmax=self.FMAX)

                    if SHOW_PLOT:
                        plt.figure()
                        plt.imshow(tf.math.log(mel_spectrogram).numpy())

                    if TORCH_EXPORT:
                        torch.save(mel_spectrogram, str(os.path.join(self.TENSOR_OUTPUT_DIR, tmp_set, tmp_label, os.path.split(dir)[-1].split('.')[0])) + '_raw_mel_spectrogram.pt')
                    if TFIO_EXPORT:
                        tf.io.write_file(str(os.path.join(self.TENSOR_OUTPUT_DIR, tmp_set, tmp_label, os.path.split(dir)[-1].split('.')[0])) + '_raw_mel_spectrogram.pt', tf.io.serialize_tensor(mel_spectrogram))

                    if FREQ_MASK and "TEST" not in dir and "VALIDATION" not in dir:
                        freq_mask = tfio.audio.freq_mask(mel_spectrogram, param=10)
                        if TORCH_EXPORT:
                            torch.save(freq_mask, str(os.path.join(self.TENSOR_OUTPUT_DIR, tmp_set, tmp_label, os.path.split(dir)[-1].split('.')[0])) + '_freq_mask_mel_spectrogram.pt')
                        if TFIO_EXPORT:
                            tf.io.write_file(str(os.path.join(self.TENSOR_OUTPUT_DIR, tmp_set, tmp_label, os.path.split(dir)[-1].split('.')[0])) + '_freq_mask_mel_spectrogram.pt', tf.io.serialize_tensor(mel_spectrogram))
                    
                    if TIME_MASK and "TEST" not in dir and "VALIDATION" not in dir:
                        time_mask = tfio.audio.time_mask(mel_spectrogram, param=10)
                        if TORCH_EXPORT:
                            torch.save(time_mask, str(os.path.join(self.TENSOR_OUTPUT_DIR, tmp_set, tmp_label, os.path.split(dir)[-1].split('.')[0])) + '_time_mask_mel_spectrogram.pt')
                        if TFIO_EXPORT:
                            tf.io.write_file(str(os.path.join(self.TENSOR_OUTPUT_DIR, tmp_set, tmp_label, os.path.split(dir)[-1].split('.')[0])) + '_time_mask_mel_spectrogram.pt', tf.io.serialize_tensor(mel_spectrogram))

        print("\nTensors complete.\n")

In [None]:
data_spectro_pipeline = mel_spectrogram_pipeline(data_preprocessing_pipeline._SET_OUTPUT_DIR)

data_spectro_pipeline.clean_dirs()
data_spectro_pipeline.get_preprocessed_files()

print('\nThis process will take approx 5-10 mins to complete depending on the power of your PC')
data_spectro_pipeline.generate_mel_spectrograms(MEL_SPECTRO=True, SHOW_PLOT=False, FREQ_MASK=True, TIME_MASK=True, TORCH_EXPORT=False, TFIO_EXPORT=True)

Cleaning tensor directory
Finding all pre-processed files

Found the following directories {'TRAIN': '/content/TRAIN_raw_flac', 'TEST': '/content/TEST_raw_flac', 'VALIDATION': '/content/VALIDATION_raw_flac'}

FOUND: spodov~TRAIN -> 560
FOUND: jabwar~TRAIN -> 493
FOUND: sheowl~TRAIN -> 376
FOUND: brant~TRAIN -> 680
FOUND: wiltur~TRAIN -> 734
FOUND: spodov~TEST -> 36
FOUND: jabwar~TEST -> 12
FOUND: sheowl~TEST -> 5
FOUND: brant~TEST -> 26
FOUND: wiltur~TEST -> 2
FOUND: spodov~VALIDATION -> 161
FOUND: jabwar~VALIDATION -> 101
FOUND: sheowl~VALIDATION -> 62
FOUND: brant~VALIDATION -> 143
FOUND: wiltur~VALIDATION -> 121

This process will take approx 5-10 mins to complete depending on the power of your PC

Generating tensors... 


Tensors complete.



## Pipeline to load data into memory for model training

This pipeline will load all the tensors into a train, test, and validation data structure and prepare it for inputs into a model for training

In [None]:
class train_test_vali_pipeline():
    def __init__(self, output_directory) -> None:
        self.DATASET_PATH  = output_directory
        self.TENSOR_OUTPUT_DIR  = None

        if '/' in self.DATASET_PATH:
            self.TENSOR_OUTPUT_DIR = os.path.join(self.DATASET_PATH, 'OUTPUT_tensors')
        elif '\\' in self.DATASET_PATH:
            self.TENSOR_OUTPUT_DIR = os.path.join(self.DATASET_PATH, 'OUTPUT_tensors')

        self.VALID_FILES = False

        self.PATHS = []
        self.train_data = self.test_data = self.vali_data = pd.DataFrame(columns=['Label', 'Tensor'])

    
    def check_valid_dirs(self) -> None:
        def contains_test(arr):
            if any("TEST" in item for item in arr): return True
            return False
        def contains_train(arr):
            if any("TRAIN" in item for item in arr): return True
            return False
        def contains_vali(arr):
            if any("VALIDATION" in item for item in arr): return True
            return False

        print('Checking to find train, test and vali directories inside tensors folder...')
        for root, dir, files in os.walk(self.TENSOR_OUTPUT_DIR):
            for i in dir:
                self.PATHS.append(os.path.join(root, i))
            if contains_test(dir) and contains_train(dir) and contains_vali(dir):
                self.VALID_FILES = True
                print('PASS')
            else:
                raise ValueError('Cannot find folders from previous pipline which include train, test and validation directories')
            break

    def load_data(self, LOAD_RAW:bool = False, 
                        LOAD_FREQ:bool = False, 
                        LOAD_TIME:bool = False, 
                        TO_CSV:bool = False, 
                        TORCH_LOAD:bool = False, 
                        TFIO_LOAD:bool = False) -> None:

        for path in self.PATHS:
            if 'TEST' in str(path) or 'TRAIN' in str(path) or 'VALIDATION' in str(path):
                for root, dir, file in os.walk(path):
                    for tmp_label in dir:  
                        for file in [f for f in listdir(os.path.join(root, tmp_label)) if isfile(join(os.path.join(root, tmp_label), f))]:
                            if '_raw_mel' in str(file) and LOAD_RAW:
                                if TORCH_LOAD:
                                    tmp_data = torch.load(os.path.join(path, tmp_label, file))
                                if TFIO_LOAD:
                                    tmp_data = tf.io.parse_tensor(tf.io.read_file(os.path.join(path, tmp_label, file)), tf.float32)
                                    
                                new_row = pd.Series({"Label": tmp_label,
                                                    "Tensor": tmp_data})
                                if "TRAIN" in path:
                                    self.train_data = pd.concat([self.train_data, new_row.to_frame().T], ignore_index=True)
                                elif "TEST" in path:
                                    self.test_data = pd.concat([self.test_data, new_row.to_frame().T], ignore_index=True)
                                elif "VALIDATION" in path:
                                    self.vali_data = pd.concat([self.vali_data, new_row.to_frame().T], ignore_index=True)
                            elif '_freq_mask' in str(file) and LOAD_FREQ:
                                if TORCH_LOAD:
                                    tmp_data = torch.load(os.path.join(path, tmp_label, file))
                                if TFIO_LOAD:
                                    tmp_data = tf.io.parse_tensor(tf.io.read_file(os.path.join(path, tmp_label, file)), tf.float32)

                                new_row = pd.Series({"Label": tmp_label,
                                                    "Tensor": tmp_data})
                                if "TRAIN" in path:
                                    self.train_data = pd.concat([self.train_data, new_row.to_frame().T], ignore_index=True)
                                elif "TEST" in path:
                                    self.test_data = pd.concat([self.test_data, new_row.to_frame().T], ignore_index=True)
                                elif "VALIDATION" in path:
                                    self.vali_data = pd.concat([self.vali_data, new_row.to_frame().T], ignore_index=True)
                            elif '_time_mask' in str(file) and LOAD_TIME:
                                if TORCH_LOAD:
                                    tmp_data = torch.load(os.path.join(path, tmp_label, file))
                                if TFIO_LOAD:
                                    tmp_data = tf.io.parse_tensor(tf.io.read_file(os.path.join(path, tmp_label, file)), tf.float32)
                                    
                                new_row = pd.Series({"Label": tmp_label,
                                                    "Tensor": tmp_data})
                                if "TRAIN" in path:
                                    self.train_data = pd.concat([self.train_data, new_row.to_frame().T], ignore_index=True)
                                elif "TEST" in path:
                                    self.test_data = pd.concat([self.test_data, new_row.to_frame().T], ignore_index=True)
                                elif "VALIDATION" in path:
                                    self.vali_data = pd.concat([self.vali_data, new_row.to_frame().T], ignore_index=True)

        if TO_CSV:
            self.train_data.to_csv(os.path.join(self.DATASET_PATH, 'train_df.csv'))
            self.test_data.to_csv(os.path.join(self.DATASET_PATH, 'test_df.csv'))
            self.vali_data.to_csv(os.path.join(self.DATASET_PATH, 'vali_df.csv'))

In [None]:
model_data_pipeline = train_test_vali_pipeline(data_preprocessing_pipeline._SET_OUTPUT_DIR)

model_data_pipeline.check_valid_dirs()

print('This process will take approx 1 minute')
model_data_pipeline.load_data(LOAD_RAW=True, LOAD_FREQ=True, LOAD_TIME=True, TO_CSV=False, TORCH_LOAD=False, TFIO_LOAD=True)

print("Finished")
print(f'Train Shape: {model_data_pipeline.train_data.shape}, Test Shape: {model_data_pipeline.test_data.shape}, Validation Shape: {model_data_pipeline.vali_data.shape}') 

Checking to find train, test and vali directories inside tensors folder...
PASS
This process will take approx 1 minute
Finished
Train Shape: (8529, 2), Test Shape: (81, 2), Validation Shape: (588, 2)


# **Till now we got our pipeline and OUTPUT_tensors**
the ablove code takes time to process:
