In [1]:
import librosa
from typing import List, Union, Optional, Tuple
import re
import os
import soundfile as sf
from collections.abc import Collection

#### soundfile package documentation
- https://python-soundfile.readthedocs.io/en/0.11.0/

#### soundfile supported file formats
- http://www.mega-nerd.com/libsndfile/#Features

#### soundfile supported file format extensions
- WAV (.wav): Microsoft Waveform Audio File Format.
- AIFF / AIFC (.aiff, .aif): Audio Interchange File Format, used mainly by Apple.
- AU / SND (.au, .snd): Sun/DEC/NeXT Audio File Format.
- RAW (.raw): Headerless raw audio data.
- PAF (.paf): Ensoniq PARIS Audio File Format.
- SVX (.svx): Amiga IFF / SVX8 / SV16 audio format.
- NIST/Sphere (.nist, .sphere): NIST SPHERE format, used mainly in speech research.
- VOC (.voc): Creative Labs Audio File.
- IRCAM (.sf): Berkeley/IRCAM/CARL Sound Format.
- W64 (.w64): Sony Wave64 format, designed for very large files.
- MAT4 / MAT5 (.mat): MATLAB file formats (V4.0 and V5.0).
- PVF (.pvf): Portable Voice Format.
- XI (.xi): FastTracker 2 Extended Instrument format.
- HTK (.htk): Hidden Markov Model Toolkit format, used in speech recognition.
- CAF (.caf): Core Audio Format, developed by Apple.
- SD2 (.sd2): Sound Designer II format, used by Digidesign.
- FLAC (.flac): Free Lossless Audio Codec.
- OGG/Vorbis (.ogg): Ogg Vorbis compressed audio.
- Ogg/Opus (.opus): Ogg Opus, another compressed audio format.
- MP3 (.mp3): MPEG Audio Layer III, supported in recent versions.

In [5]:
mydict = {
    'k1':"something",
    'k2':"something_else",
    'k3':"something_else_else",
    'k4':"something_else_else_else"
}

print('k1' in mydict.keys())
print('k1' in set(mydict.keys()) )

True
True


In [27]:
class Audio:
    
    # Utility Funcitons
    def _make_str_set(item: Optional[Union[str, Tuple[str, ...], List[str], set[str]]]) -> set:
        
        if isinstance(item, str):
            item = {item}
        elif isinstance(item, Collection) and all(isinstance(i, str) for i in item) and not isinstance(item, set):
            item = set(item)
        else:
            item = {}
            
        return item
    
    def __init__(self, paths: Union[str, Tuple[str, ...], List[str], set[str]], extensions: Optional[Union[str, Tuple[str, ...], List[str], set[str]]] = None, exclude: Optional[Union[str, Tuple[str, ...], List[str], set[str]]] = None, recursive: Optional[bool] = False) -> None:
        """
        Creates a new audio object.
        
        This function can initialize a new empty object or load specific files based on the include path(s),
        exclude regex pattern(s), and the recursive directory search flag.

        Parameters
        ----------
            paths: Mandatory parameter. File/directory paths to search.
            extensions: File extensions to include. Default includes all supported file extensions.
            exclude: Regex patterns for which matching paths are excluded.
            recursive: Whether to search directories recursively. Defaults to False.
        Examples
        --------
            load 2 files and search a directory recursively while ignoring filenames containing "file1" or "file3" or "file7":
            
            >>> myaudio = Audio(path=["../audiofile1.wav", "../audiofile2.wav", "../audio_directory"],
                                extension: ['wav','mp3'],
                                exclude=[r'.*file[137].*],
                                recursive=True) 
        """
        
        self.samples = {}
        if(paths):
            self.fetch(paths, extensions, exclude, recursive)
    
    ## add the ability to specify extension type, default can be all or .wav...
    def fetch(self, paths: Union[str, Tuple[str, ...], List[str], set[str]], extensions: Optional[Union[str, Tuple[str, ...], List[str], set[str]]] = None, exclude: Optional[Union[str, Tuple[str, ...], List[str], set[str]]] = None, recursive: Optional[bool] = False, append: Optional[bool] = True) -> bool:
        """
        Loads audio files. This function allows loading specific audio files or directories with optional exclusion patterns and a recursive search option.
        It can either append files to an existing audio_samples dictionary or overwrite it.

        Parametrs
        ---------
            paths: Mandatory parameter. File/directory paths to search.
            extensions: File extensions to include. Default includes all supported file extensions.
            exclude: Regex patterns for which matching paths are excluded.
            recursive: Whether to search directories recursively. Defaults to False.
            append: True to add new files to the existing dictionary, False to overwrite. Defaluts to True.

        Returns
        -------
            bool: True if all files loaded successfully, False otherwise.
        
        Examples
        --------
            load 2 files and search a directory recursively while ignoring filenames containing "file1" or "file3" or "file7":
            
            >>> myaudio = Audio(include=["../audiofile1.wav", "../audiofile2.wav", "../audio_directory"],
                                exclude=[r'.*file[137].*],
                                recursive=True)
        """
        
        # Ensuring consistent path parameter formatting
        paths = self._make_str_set(paths)
        if not paths:
            print("Please enter a source path string or a collection of source path strings. Returning...")
            return False
        
        # handling different extension input cases
        if extensions is None:
            # defaults to all supported file formats
            extensions = {"wav", "aiff", "aif", "au", "snd", "raw", "paf", "svx", "nist", "sphere", "voc", "sf", "w64", "mat", "pvf", "xi", "htk", "caf", "sd2", "flac", "ogg", "opus", "mp3"}  # Supported audio formats
        else:
            extensions = self._make_str_set(extensions)
            if not extensions:
                print("Please provide supported soundfile extension string or collection of supported soundfile extension strings. Returning...")
                return False
            
        # Ensuring consistent exclude parameter formatting
        if exclude is None:
            exclude = {}
        else:
            exclude = self._make_str_set(exclude)
            if not exclude:
                print("Please ensure only regex strings or collecitons of regex strings are used as the exclude patterns. Returning...")
                return False
        
        # will store all file paths
        audio_files = []
        
        for p in paths:
            if os.path.isdir(p):
                audio_files.extend(self._fetch_from_directory(p, extensions, exclude, recursive))
            elif os.path.isfile(p):
                ext = p.split('.')[-1]
                if '.' in p and ext in extensions:
                    audio_files.append(p)
            else:
                print(f"{p} does not exist, is not a supported soundfile format or is not a regular file/directory.")
        
        # Clearing samples dictionary if append = False
        if(not append):
            self.samples.clear()
        
        # check for existence before loading
        for file in audio_files:
            if file in self.samples.keys():
                continue
            else:
                try:
                    sample, sr = librosa.load(file)
                    fullpath = os.path.abspath(file)
                    file = file.split('/')[-1]
                    self.samples[file] = {
                        "fullpath": fullpath,
                        "sample": sample,
                        "sr": sr
                    }
                except Exception as e:
                    print(f"Error loading {file}: {e}.")
                    return False
        
        return True
    
    def _fetch_from_directory(self, directory: str, extensions: Optional[Union[str, Tuple[str, ...], List[str], set[str]]] = None, exclude: Optional[Union[str, Tuple[str, ...], List[str], set[str]]] = None, recursive: Optional[bool] = False) -> List[str]:
        """
        Fetch files from a directory based on include and exclude patterns and recursive search flag.

        Parameters
        ----------
            directory: The directory from which audio files will be fetched.
            extensions: File extensions to include. Default includes all supported file extensions.
            exclude: Regex patterns for which matching paths are excluded.
            recursive: Whether to search directories recursively. Defaults to False.

        Returns
        -------
            List[str]: List of files meeting criteria.
        """
        
        directory = directory if directory.endswith('/') else (directory + '/')
        extensions = ["wav", "aiff", "aif", "au", "snd", "raw", "paf", "svx", "nist", "sphere", "voc", "sf", "w64", "mat", "pvf", "xi", "htk", "caf", "sd2", "flac", "ogg", "opus", "mp3"]  # Supported audio formats
        
        audio_files = []
        
        iteration = 0
        for root, dirs, files in os.walk(directory):
            for file in files:
                file_path = os.path.join(root, file)
                if any(file_path.endswith(ext) for ext in extensions) and not self._is_excluded(file_path, exclude):
                    audio_files.append(file_path)

            iteration += 1            

            if not recursive and iteration > 0:
                break

            # Exclude directories early
            dirs[:] = [d for d in dirs if not self._is_excluded(os.path.join(root, d), exclude)]            
                    
        return audio_files
   
    def _is_excluded(self, file_path: str, exclude: Tuple[re.Pattern, ...]) -> bool:
        """
        Check if a file path matches any of the excluded regex patterns.

        Parameters
        ----------
            file_path (str): The file path to check.
            exclude (Tuple[re.Pattern, ...]): Tuple of regex patterns to exclude.
        
        Returns
        -------
            bool: Boolean indicating whether or not the file path is excluded.
        """
        
        for e in exclude:
            if(e.search(file_path)):
                return True
        return False
    
    ## add a log
    def write(self, output_path: str) -> bool:
        """
        Writes current state of audio samples to a specified directory. Automatically places output in a new "output" folder or overwrites existing files.

        Parameters
        ----------
            output_path (str): Path of directory in which to write currently loaded audio files.
            in_place (bool, optional): True to overwrite existing files, False to place output in new folder. Defaults to False.
        
        Returns
        -------
            bool: Boolean indicating whether or not all file(s) were written successfully.
        """
        
        if not os.path.isdir(output_path):
            print(f"{output_path} is either not a directory or doesn't exist.")
            return False
        
        output_path = output_path+"output/" if output_path.endswith('/') else output_path+'/output/'
        if not os.path.isdir(output_path):
            os.mkdir(output_path)

        for file in self.samples.keys():
            sf.write(os.path.join(output_path,file), self.samples[file][0], self.samples[file][1])
            

    def print(self) -> None:
        """
        Displays the current state of the samples dictionary. Shows file path as key and list containing details of loaded sample array
        and sample rate as value.
        """
        count = 0
        print("{")
        if len(self.samples.keys()) != 0:
            for key in self.samples.keys():
                if (count == (len(self.samples.keys())-1) ):
                    print(f"{key} : [ (dtype={self.samples[key][0].dtype}, shape=|{self.samples[key][0].shape}), sample_rate={self.samples[key][1]} ]")
                else:
                    print(f"{key} : [ (dtype={self.samples[key][0].dtype}, shape={self.samples[key][0].shape}), sample_rate={self.samples[key][1]} ],")
                count += 1
        print("}")

In [28]:
class Noise:
    
    def __init__(self) -> None:
        pass

In [29]:
myaudio = Audio()

In [30]:
myaudio.

{
../temp/actor1/03-01-01-01-01-01-01.wav : [ (dtype=float32, shape=(72838,)), sample_rate=22050 ],
../temp/actor1/03-01-02-02-02-01-01.wav : [ (dtype=float32, shape=(92703,)), sample_rate=22050 ],
../temp/actor1/03-01-04-02-01-02-01.wav : [ (dtype=float32, shape=|(81667,)), sample_rate=22050 ]
}


In [31]:
myaudio.fetch("../temp/actor11", recursive=True, append=False)

True

In [32]:
myaudio.print()

{
../temp/actor11/03-01-01-01-02-02-11.wav : [ (dtype=float32, shape=(69895,)), sample_rate=22050 ],
../temp/actor11/03-01-03-01-02-01-11.wav : [ (dtype=float32, shape=|(69895,)), sample_rate=22050 ]
}


In [33]:
myaudio.write("../temp/")

LibsndfileError: Error opening '../temp/output/../temp/actor11/03-01-01-01-02-02-11.wav': System error.