# SENSITIVITY ANALYSES
------------------------

## Import Libraries

In [1]:
github_dir = "/home/kiran/Documents/github/CCAS_ML"

# add path to local functions
import os
os.chdir(github_dir)

# import all the params for this model
from example_params import *
is_forked = True # is going to need to go into the params file

# import own functions
import preprocess.preprocess_functions as pre
import postprocess.evaluation_metrics_functions_old as metrics
import postprocess.merge_predictions_functions as ppm
import model.specgen_batch_generator as bg
import model.network_class as rnn
# import postprocess.visualise_prediction_functions as pp
from model.callback_functions import LossHistory
import model.audiopool as audiopool

# import normal packages used in pre-processing
import numpy as np
import librosa
import warnings
import ntpath
import os
from itertools import compress  
from random import random, shuffle
from math import floor
import statistics
import glob

# plotting
import matplotlib.pyplot as plt

# ML section packages
import datetime
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras import optimizers
from keras.layers import Input, Flatten
from keras.models import Model
from keras.layers import Conv2D, MaxPooling2D, Activation, SeparableConv2D, concatenate
from keras.layers import Reshape, Permute
from keras.layers import TimeDistributed, Dense, Dropout, BatchNormalization
from keras.models import load_model
from keras.layers import GRU, Bidirectional, GlobalAveragePooling2D
from keras.callbacks import TensorBoard
from keras.utils import plot_model

# postprocessfrom decimal import Decimal
from decimal import Decimal
import pandas as pd
import pickle

# evaluate and plot 
import seaborn as sn
# import pandas as pd
# import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import csv

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit






Using TensorFlow backend.


----------------------------------------------------------------------
# PREPROCESSING / DATA WRANGLING

Currently, all the meerkat files are found on the server. The labels in particular are divided by year (currently 2017 and 2019) and all file labels are in a single document. This bit of code just takes these and puts them together

In [2]:
# Compile all the synched label files together
labels_all = pd.DataFrame()
for directory in label_dirs:
    for group in group_IDs:
        temp = pd.read_csv(os.path.join(directory, group +"_ALL_CALLS_SYNCHED.csv"), sep=sep,
                       header=0, engine = engine, encoding = encoding) 
        temp["group"] = group
        labels_all = pd.concat([labels_all, temp]) 
        del temp

labels_all = labels_all[-labels_all.wavFileName.str.contains('SOUNDFOC')]
labels_all = labels_all.reset_index(drop = True)

Data also contain focal follows (someone walking around behind the meerkats) and the resultion of this data is different and therefore not anlysed with the collar data (but could be done separately or put to the same resolution).

In [3]:
# subset all the audio files that we should use in the analysis (i.e. not focal follow data)
audio_files = list(set(labels_all["wavFileName"]))
audio_filenames = list(compress(audio_files, ["SOUNDFOC" not in filei for filei in audio_files]))

# subset all the audio files that we should use in the analysis (i.e. not focal follow data)
label_files = list(set(labels_all["csvFileName"]))
label_filenames = list(compress(label_files, ["SOUNDFOC" not in filei for filei in label_files]))

# get the file IDS without all the extentions (used later for naming)
all_filenames = [audio_filenames[i].split(".")[0] for i in range(0,len(audio_filenames))]

Then we locate all the paths to the files

In [4]:
# find all the labels
EXT = "*.csv"
label_filepaths = []
for PATH in acoustic_data_path :
      label_filepaths.extend( [file for path, subdir, files in os.walk(PATH) for file in glob.glob(os.path.join(path, EXT))])
EXT = "*.CSV"
for PATH in acoustic_data_path :
      label_filepaths.extend( [file for path, subdir, files in os.walk(PATH) for file in glob.glob(os.path.join(path, EXT))])

# find all audio paths (will be longer than label path as not everything is labelled)
audio_filepaths = []
EXT = "*.wav"
for PATH in audio_dirs:
      audio_filepaths.extend( [file for path, subdir, files in os.walk(PATH) for file in glob.glob(os.path.join(path, EXT))])

### Create a label table

Currently, the labels are stored in a file generated by audition for the meerkats. We want to these manual labels and put them into a more meaningful categories for for the machine learning. To set categories, I use a pre-defined dictionary called call_types that is defined in the parameters file and which specifies what the different classes are for the call types. Anything strange gets put into a category "oth" for other.

In [5]:
# Create the label table
label_table = pre.create_meerkat_table(labels_all, call_types, sep,
                                       start_column, duration_column, columns_to_keep,
                                       label_column, convert_to_seconds, 
                                       label_for_other, label_for_noise, engine,
                                       multiclass_forbidden)

This label 'c' on line 5909 will be classed as 'oth'
This label 'c' on line 7266 will be classed as 'oth'
This label 'cn' on line 7921 will be classed as 'oth'
This label 'chew' on line 8385 will be classed as 'oth'
This label 'sb' on line 8504 will be classed as 'oth'
This label 'chew' on line 8782 will be classed as 'oth'
This label 'chew' on line 8862 will be classed as 'oth'
This label 'chew' on line 8886 will be classed as 'oth'
This label 'chew' on line 8887 will be classed as 'oth'
This label 'chew' on line 8890 will be classed as 'oth'
This label 'chew' on line 8914 will be classed as 'oth'
This label 'chew' on line 8933 will be classed as 'oth'
This label 'chew' on line 8962 will be classed as 'oth'
This label 'chew' on line 9008 will be classed as 'oth'
This label 'chew' on line 9011 will be classed as 'oth'
This label 'chew' on line 9045 will be classed as 'oth'
This label 'chew' on line 9046 will be classed as 'oth'
This label 'chew' on line 9243 will be classed as 'oth'
Th

In [6]:
# estimate the average beep length because many of them are not annotated in the data
avg_beep = round(statistics.mean(label_table.loc[label_table["beep"],"Duration"].loc[label_table.loc[label_table["beep"],"Duration"]>0]),3)
label_table.loc[(label_table["beep"].bool and label_table["Duration"] == 0.) ==True, "Duration"] = avg_beep
label_table.loc[(label_table["beep"].bool and label_table["Duration"] == avg_beep) ==True, "End"] += avg_beep
print(avg_beep)

0.065


In [7]:
# add wav and audio paths
label_table["wav_path"] = label_table['wavFileName'].apply(lambda x: [pathi for pathi in audio_filepaths if x in pathi][0])
label_table["label_path"] = label_table['csvFileName'].apply(lambda x: [pathi for pathi in label_filepaths if x in pathi][0])

In [8]:
# make sure these paths are added to the noise table too
columns_to_keep.append("wav_path")
columns_to_keep.append("label_path")

# create the matching noise table
noise_table = pre.create_noise_table(label_table, call_types, label_for_noise, label_for_startstop, columns_to_keep)#, '\$'])

# remove rows where the annotated noise is smaller than the window size otherwise the spectrogram we generate will inclue a call
noise_table = noise_table.drop(noise_table[noise_table["Duration"] < spec_window_size].index)

In [12]:
print(label_table.iloc[0:4])

label_table.to_csv("/media/kiran/D0-P1/animal_data/meerkat/ful_label_table.csv")

   index          Label     Start  Duration       End     cc     sn     mo  \
0      0          start  3600.000     0.065  3600.065  False  False  False   
1      1  synch 0:58:30  3646.411     1.428  3647.839  False  False  False   
2      2             cc  3858.881     0.234  3859.115   True  False  False   
3      3           cc x  3871.671     0.247  3871.918   True  False  False   

     agg     ld  ...      ind   group  callType  isCall  focalType  hybrid  \
0  False  False  ...  VCVM001  HM2017     start       0          F       0   
1  False  False  ...  VCVM001  HM2017     synch       0          F       0   
2  False  False  ...  VCVM001  HM2017        cc       1          F       0   
3  False  False  ...  VCVM001  HM2017        cc       1          F       0   

  noisy unsureType                                           wav_path  \
0     0          0  /home/kiran/Documents/MPI-Server/EAS_shared/me...   
1     0          0  /home/kiran/Documents/MPI-Server/EAS_shared/me...   