Set up

In [1]:
# tutorial imports, check later to see which may be redundant

# Preprocessor classes are used to load, transform, and augment audio samples for use in a machine learing model
from opensoundscape.preprocess.preprocessors import SpectrogramPreprocessor
from opensoundscape.ml.datasets import AudioFileDataset, AudioSplittingDataset


# helper function for displaying a sample as an image
from opensoundscape.preprocess.utils import show_tensor, show_tensor_grid

#other utilities and packages
import torch
import pandas as pd
from pathlib import Path
import numpy as np
import random
import subprocess
import IPython.display as ipd

In [2]:
# Imports by ben
import json

In [22]:
# path to data
dataset_path = r'/home/ben/data/full_dataset/'
json_path = r'/home/ben/data/dataset.json'

In [23]:
# Beware below! Had plotting problems with matplotlib in wsl vscode so may need to play 

#set up plotting
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize']=[15,5] #for large visuals
%config InlineBackend.figure_format = 'retina'

In [24]:
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

### Accessing data
The open soundscape tutorial loads some custom datasets and puts these in a csv. So we must read the JSON file and produce a csv of this.

In [48]:
# Load the JSON data from the file
with open(json_path, "r") as file:
    data = json.load(file)

# Convert the list of dictionaries (which is the value of the main dictionary) into a DataFrame
df = pd.DataFrame(data[list(data.keys())[0]])

### Initialise preprocessor

In [58]:
pre = SpectrogramPreprocessor(sample_duration=1.92)

In [59]:
# Create a dataframe with just file_path and a class column (req for AudioFileDataset)
transformed_df = df[['file_name', 'class']].copy()

# rename 'file_name' column to 'file'
transformed_df.rename(columns={'file_name': 'file'}, inplace=True)

# set file to be the index for AudioFileDataset
transformed_df.set_index('file', inplace=True)

# set all classes to 1 as AudioFileDataset requires class
transformed_df['class'] = 1

# append dataset_path to start of file_name column
transformed_df.index = dataset_path + transformed_df.index
transformed_df.head()

Unnamed: 0_level_0,class
file,Unnamed: 1_level_1
/home/ben/data/full_dataset/test_data.australia.class0.degraded_moth32.20230209_162000.wav,1
/home/ben/data/full_dataset/test_data.australia.class0.degraded_moth32.20230209_164400.wav,1
/home/ben/data/full_dataset/test_data.australia.class0.degraded_moth32.20230209_171600.wav,1
/home/ben/data/full_dataset/test_data.australia.class0.degraded_moth32.20230209_184000.wav,1
/home/ben/data/full_dataset/test_data.australia.class0.degraded_moth32.20230209_195200.wav,1


In [60]:
dataset = AudioFileDataset(transformed_df,pre)
len(dataset)

79066

In [61]:
dataset[0] #loads and preprocesses the sample at row 0 of dataset.df

PreprocessingError: failed to preprocess sample from path: /home/ben/data/full_dataset/test_data.australia.class0.degraded_moth32.20230209_162000.wav

In [36]:
from opensoundscape import Audio
audio_path = r'/home/ben/data/full_dataset/test_data.australia.class0.degraded_moth32.20230209_162000.wav'
audio_object = Audio.from_file(audio_path)
audio_object

In [37]:
from opensoundscape import Audio
audio_path = r'./woodcock_labeled_data/d4c40b6066b489518f8da83af1ee4984.wav'
audio_object = Audio.from_file(audio_path)
audio_object

# Tutorials data and code for comparisons

In [54]:
subprocess.run(['curl','https://drive.google.com/uc?export=download&id=1Ly2M--dKzpx331cfUFdVuiP96QKGJz_P','-L', '-o','woodcock_labeled_data.tar.gz']) # Download the data
subprocess.run(["tar","-xzf", "woodcock_labeled_data.tar.gz"]) # Unzip the downloaded tar.gz file
subprocess.run(["rm", "woodcock_labeled_data.tar.gz"]) # Remove the file after its contents are unzipped

# load one-hot labels dataframe
labels = pd.read_csv('./woodcock_labeled_data/one_hot_labels.csv').set_index('file')

# prepend the folder location to the file paths
labels.index = pd.Series(labels.index).apply(lambda f: '/home/ben/reef-audio-representation-learning/code/woodcock_labeled_data/'+f)

#inspect
labels.head()

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:--  0:00:04 --:--:--     0
100 9499k  100 9499k    0     0  1823k      0  0:00:05  0:00:05 --:--:-- 12.7M


Unnamed: 0_level_0,present,absent
file,Unnamed: 1_level_1,Unnamed: 2_level_1
/home/ben/reef-audio-representation-learning/code/woodcock_labeled_data/d4c40b6066b489518f8da83af1ee4984.wav,1,0
/home/ben/reef-audio-representation-learning/code/woodcock_labeled_data/e84a4b60a4f2d049d73162ee99a7ead8.wav,0,1
/home/ben/reef-audio-representation-learning/code/woodcock_labeled_data/79678c979ebb880d5ed6d56f26ba69ff.wav,1,0
/home/ben/reef-audio-representation-learning/code/woodcock_labeled_data/49890077267b569e142440fa39b3041c.wav,1,0
/home/ben/reef-audio-representation-learning/code/woodcock_labeled_data/0c453a87185d8c7ce05c5c5ac5d525dc.wav,1,0


In [55]:
pre1 = SpectrogramPreprocessor(sample_duration=2.0)

In [56]:
dataset1 = AudioFileDataset(labels,pre1)
dataset

<class 'opensoundscape.ml.datasets.AudioFileDataset'> object with preprocessor: Preprocessor with pipeline:
load_audio           Action calling <bound method Audio.from_file o...
random_trim_audio    Augmentation Action calling <function trim_aud...
trim_audio           Action calling <function trim_audio at 0x7f225...
to_spec              Action calling <bound method Spectrogram.from_...
bandpass             Action calling <function Spectrogram.bandpass ...
to_tensor                                                       Action
time_mask            Augmentation Action calling <function time_mas...
frequency_mask       Augmentation Action calling <function frequenc...
add_noise            Augmentation Action calling <function tensor_a...
rescale              Action calling <function scale_tensor at 0x7f2...
random_affine        Augmentation Action calling <function torch_ra...
dtype: object

In [57]:
dataset1[0] #loads and preprocesses the sample at row 0 of dataset.df

AudioSample(source=/home/ben/reef-audio-representation-learning/code/woodcock_labeled_data/d4c40b6066b489518f8da83af1ee4984.wav, start_time=0.0,end_time=2.0, labels=present    1
absent     0
Name: /home/ben/reef-audio-representation-learning/code/woodcock_labeled_data/d4c40b6066b489518f8da83af1ee4984.wav, dtype: int64)