In [1]:
import pandas as pd
import os
import time
import matplotlib.pyplot as plt

# Parallel processing packages
# from functools import partial
from tqdm import tqdm
from concurrent import futures

from maad import sound, features
from maad.util import date_parser
import multiprocessing as mp

In [2]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.0/250.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2
[0m

In [3]:
def single_file_processing (audio_path,
                            date) :
    """
    Parameters
    ----------
    audio_path : string
        full path to the audio file (.wav) to process.
        The full path is in the dataframe given by the function date_parser
    date : datetime
        date of recording of the audio file.
        The date is in the dataframe given by the function date_parser

    Returns
    -------
    df_indices : dataframe
        Dataframe containing all the temporal and spectral indices, as well as
        the audio path ('file' column) and the recording date ('Date' column)

    """

    # Load the original sound (16bits) and get the sampling frequency fs
    try :
        wave,fs = sound.load(filename=audio_path,
                            channel='left',
                            detrend=True,
                            verbose=False)

        """ ===================================================================
                        Computation in the time domain
        ===================================================================="""

        # compute all the audio indices and store them into a DataFrame
        # dB_threshold and rejectDuration are used to select audio events.
        df_audio_ind = features.all_temporal_alpha_indices(
                                    wave, fs,
                                    gain = G, sensibility = S,
                                    dB_threshold = 3, rejectDuration = 0.01,
                                    verbose = False, display = False)

        """ ===================================================================
                        Computation in the frequency domain
        ===================================================================="""

        # Compute the Power Spectrogram Density (PSD) : Sxx_power
        Sxx_power,tn,fn,ext = sound.spectrogram (
                                        wave, fs, window='hann',
                                        nperseg = 1024, noverlap=1024//2,
                                        verbose = False, display = False,
                                        savefig = None)

        # compute all the spectral indices and store them into a DataFrame
        # flim_low, flim_mid, flim_hi corresponds to the frequency limits in Hz
        # that are required to compute somes indices (i.e. NDSI)
        # if R_compatible is set to 'soundecology', then the output are similar to
        # soundecology R package.
        # mask_param1 and mask_param2 are two parameters to find the regions of
        # interest (ROIs). These parameters need to be adapted to the dataset in
        # order to select ROIs
        df_spec_ind, _ = features.all_spectral_alpha_indices(
                                                Sxx_power,
                                                tn,fn,
                                                flim_low = [0,1500],
                                                flim_mid = [1500,8000],
                                                flim_hi  = [8000,20000],
                                                gain = G, sensitivity = S,
                                                verbose = False,
                                                R_compatible = 'soundecology',
                                                mask_param1 = 6,
                                                mask_param2=0.5,
                                                display = False)

        """ ===================================================================
                        Create a dataframe
        ===================================================================="""
        # add scalar indices into the df_indices dataframe
        df_indices = pd.concat([df_audio_ind,
                                df_spec_ind], axis=1)

        # add date and audio_path
        df_indices.insert(0, 'Date', date)
        df_indices.insert(1, 'file', audio_path)

    except:
        # if an error occur, send an empty output
        df_indices = pd.DataFrame()

    return df_indices

In [4]:
SPECTRAL_FEATURES=['MEANf','VARf','SKEWf','KURTf','NBPEAKS','LEQf',
'ENRf','BGNf','SNRf','Hf', 'EAS','ECU','ECV','EPS','EPS_KURT','EPS_SKEW','ACI',
'NDSI','rBA','AnthroEnergy','BioEnergy','BI','ROU','ADI','AEI','LFC','MFC','HFC',
'ACTspFract','ACTspCount','ACTspMean', 'EVNspFract','EVNspMean','EVNspCount',
'TFSD','H_Havrda','H_Renyi','H_pairedShannon', 'H_gamma', 'H_GiniSimpson','RAOQ',
'AGI','ROItotal','ROIcover']

TEMPORAL_FEATURES=['ZCR','MEANt', 'VARt', 'SKEWt', 'KURTt',
            'LEQt','BGNt', 'SNRt','MED', 'Ht','ACTtFraction', 'ACTtCount',
            'ACTtMean','EVNtFraction', 'EVNtMean', 'EVNtCount']

# Parameters of the audio recorder. This is not a mandatory but it allows
# to compute the sound pressure level of the audio file (dB SPL) as a
# sonometer would do.
S = -35         # Sensbility microphone-35dBV (SM4) / -18dBV (Audiomoth)
G = 26+16       # Amplification gain (26dB (SM4 preamplifier))

In [None]:
if __name__ == '__main__':  # Multiprocessing should be declared under the main entry point
    mp.set_start_method("fork")   # This start method is necessary for macOS. It is the default method on Linux

    df = date_parser("Dataset2", dateformat='SM4', verbose=True)

    # Date is used as index. Reset the index in order to get back Date as column
    df.reset_index(inplace = True)
    
    # At least 2 CPUs will be used in parallel and the files to process will be
    # distributed on each CPU depending on their availability. This will speed up
    # the process.

    # create an empty dataframe. It will contain all ROIs found for each
    # audio file in the directory
    df_indices = pd.DataFrame()

    # Number of CPU used for the calculation.
    nb_cpu = os.cpu_count()
    
    print(df_indices)
    print(nb_cpu)

    tic = time.perf_counter()
    print(tic)
    # Multicpu process
    # with tqdm(total=len(df), desc="multi cpu indices calculation...") as pbar:
    with futures.ProcessPoolExecutor(max_workers=nb_cpu) as pool:
        # give the function to map on several CPUs as well its arguments as
        # as list
        for df_indices_temp in pool.map(
            single_file_processing,
            df["file"].to_list(),
            df["Date"].to_list()
        ):
            # pbar.update(1)
            print("done")
            df_indices = pd.concat([df_indices, df_indices_temp])
            #guardar en excel
            df_indices.to_excel("Indices.xlsx")

F02_20230420_154500.WAV
F02_20230420_180000.WAV
F02_20230423_200000.WAV
F02_20230424_180000.WAV
F02_20230425_160000.WAV
F02_20230425_234500.WAV
F02_20230428_194500.WAV
F02_20230429_104500.WAV
F02_20230429_120000.WAV
F02_20230429_154500.WAV
F02_20230429_214500.WAV
H10_20230419_054500.WAV
H10_20230419_070000.WAV
H10_20230420_041500.WAV
H10_20230420_163000.WAV
H10_20230421_060000.WAV
H10_20230422_010000.WAV
H10_20230422_183000.WAV
H10_20230423_174500.WAV
H10_20230425_094500.WAV
H10_20230425_150000.WAV
Empty DataFrame
Columns: []
Index: []
8
1048.139183092


In [None]:
df_indices
df_indices.to_excel("Indices.xlsx")