gibbonR: An R package for the automated detection and classification of female gibbon calls from long-term acoustic recordings
- Authors
- Package description
- Tutorial
- Quick start guide
- Part 1. Training Data with Labeled .wav clips
- Part 2. Run the detector/classifier
- Part 3. Data visualization
Dena J. Clink & Holger Klinck
K. Lisa Yang Center for Conservation Bioacoustics, Cornell Lab of
Ornithology, Cornell University
The field of bioacoustics is inherently multidisciplinary and relies on computer scientists, engineers, and ecologists. This package is directed towards ecologists who are interested in incorporating bioacoustics into their research, but may not have the skills or training. The goal for the creation of this package was to make commonly used signal processing techniques and various machine learning algorithms readily available in R for anyone interested in using bioacoustics in their research.
Planned release of updated package October 31, 2022
https://denajgibbon.github.io/gibbonR-tutorial/
You can install the development version from GitHub with:
# install.packages("devtools")
# devtools::install_github("DenaJGibbon/gibbonR")
library(gibbonR)
#> Loading required package: stringr
#> Loading required package: e1071
#> Loading required package: randomForest
#> randomForest 4.7-1
#> Type rfNews() to see new features/changes/bug fixes.
#> Loading required package: tuneR
#> Loading required package: seewave
# You need to tell R where to store the zip files on your computer.
destination.file.path.zip <-
"/Users/denaclink/Downloads/BorneoExampleData.zip"
# You also need to tell R where to save the unzipped files
destination.file.path <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/"
# This function will download the data from github
utils::download.file("https://github.com/DenaJGibbon/BorneoExampleData/archive/master.zip",
destfile = destination.file.path.zip)
# This function will unzip the file
utils::unzip(zipfile = destination.file.path.zip,
exdir = destination.file.path)
# Examine the contents
list.of.sound.files <- list.files(paste(destination.file.path,
"BorneoExampleData-master", "data", sep =
"/"),
full.names = T)
list.of.sound.files
Use this function to read in the .RDA file and save it as an R object from https://stackoverflow.com/questions/5577221/how-can-i-load-an-object-into-a-variable-name-that-i-specify-from-an-r-data-file
loadRData <- function(fileName) {
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
This function will load the entire list of r data files
list.rda.files <- list()
for(x in 1:length(list.of.sound.files)){
list.rda.files[[x]] <- loadRData(list.of.sound.files[[x]])
}
Assign each rda an informative name
multi.class.list <- list.rda.files[[1]]
S11_20180219_060002_1800sto3600s <- list.rda.files[[2]]
Now we create a directory with the training .wav files
TrainingDataDirectory <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/BorneoMultiClass"
for(a in 1:length(multi.class.list)){
Temp.element <- multi.class.list[[a]]
writeWave(Temp.element[[2]], paste(TrainingDataDirectory,Temp.element[[1]],sep='/'))
}
TrainingWavFilesDir <-
"/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/BorneoMultiClass/"
trainingdata <- gibbonR::MFCCFunction(input.dir=TrainingWavFilesDir, min.freq = 400, max.freq = 1600,win.avg='standard')
trainingdata$class <- as.factor(trainingdata$class)
trainingdata$class <- as.factor(trainingdata$class)
ml.model.svm <- e1071::svm(trainingdata[, 2:ncol(trainingdata)], trainingdata$class, kernel = "radial",
cross = 25,
probability = TRUE)
print(paste('SVM accuracy',ml.model.svm$tot.accuracy))
#> [1] "SVM accuracy 88"
ml.model.rf <- randomForest::randomForest(x=trainingdata[, 2:ncol(trainingdata)], y = trainingdata$class)
print(ml.model.rf)
#>
#> Call:
#> randomForest(x = trainingdata[, 2:ncol(trainingdata)], y = trainingdata$class)
#> Type of random forest: classification
#> Number of trees: 500
#> No. of variables tried at each split: 13
#>
#> OOB estimate of error rate: 10.67%
#> Confusion matrix:
#> female.gibbon leaf.monkey noise solo.gibbon class.error
#> female.gibbon 18 0 1 1 0.1000000
#> leaf.monkey 0 11 4 0 0.2666667
#> noise 0 0 19 1 0.0500000
#> solo.gibbon 0 0 1 19 0.0500000
# Specify the folder where the training data will be saved
TrainingDataFolderLocation <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/TrainingDataFromRavenSelectionTables/"
TrainingDataMFCC <- MFCCFunction(input.dir= TrainingDataFolderLocation, min.freq = 400, max.freq = 1600,win.avg="standard")
TrainingDataMFCC$class <- as.factor(TrainingDataMFCC$class)
TestFileDirectory <- '/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTestFiles'
OutputDirectory <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/DetectAndClassifyOutput"
gibbonR(input=TestFileDirectory,
feature.df=TrainingDataMFCC,
model.type.list=c('SVM','RF'),
tune = TRUE,
short.wav.duration=300,
target.signal = c("female.gibbon"),
min.freq = 400, max.freq = 1600,
noise.quantile.val=0.15,
minimum.separation =3,
n.windows = 9, num.cep = 12,
spectrogram.window =160,
pattern.split = ".wav",
min.signal.dur = 3,
max.sound.event.dur = 25,
maximum.separation =1,
probability.thresh.svm = 0.15,
probability.thresh.rf = 0.15,
wav.output = "TRUE",
output.dir =OutputDirectory,
swift.time=TRUE,time.start=5,time.stop=10,
write.table.output=FALSE,verbose=TRUE,
random.sample='NA')
library(gibbonR)
library(ggpubr)
#> Loading required package: ggplot2
#>
#> Attaching package: 'ggplot2'
#> The following object is masked from 'package:randomForest':
#>
#> margin
gibbonID(input.dir="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/",output.dir="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',add.spectrograms=TRUE,min.freq=400,max.freq=1600,class='no.clustering')
#> [1] "Step 1 Calculating MFCCs"
#> [1] "Step 2 Using class labels for clustering"
#> [1] "Step 3 Creating Spectrograms "
#> [1] "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/ already exists"
#> [1] "Adding Spectrograms to Plot Step 3 of 3"
library(gibbonR)
library(ggpubr)
library(apcluster)
#>
#> Attaching package: 'apcluster'
#> The following object is masked from 'package:stats':
#>
#> heatmap
gibbonID(input.dir="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/",output.dir="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',class='affinity.fixed', q.fixed=0.1,add.spectrograms=TRUE,min.freq=400,max.freq=1600)
#> [1] "Step 1 Calculating MFCCs"
#> [1] "Step 2 Computing unsupervised clustering with fixed q"
#> [1] "Step 3 Creating Spectrograms "
#> [1] "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/ already exists"
#> [1] "Adding Spectrograms to Plot Step 3 of 3"
This package is currently in development, with submission to rOpenSci planned shortly. In the interim, please cite the arXiv preprint:
Clink, D. J. & H. Klinck. (2019). gibbonR: An R package for the detection and classification of acoustic signals using machine learning. arXiv, 1906.02572. https://doi.org/10.48550/arXiv.1906.02572