In [1]:
 #@title Imports. { vertical-output: true }
from etils import epath
from ml_collections import config_dict
import numpy as np
import tensorflow as tf
import tqdm
# from src.perch.inference import colab_utils
# colab_utils.initialize(use_tf_gpu=True, disable_warnings=True)

from src.perch import audio_utils
from src.perch.inference import embed_lib
from src.perch.inference import tf_examples
import matplotlib.pyplot as plt
import os

In [2]:
from src.datamodule.gadme_datamodule import GADMEDataModule
from src.datamodule.base_datamodule import DatasetConfig, LoadersConfig, LoaderConfig
from src.datamodule.components.transforms import TransformsWrapper
from src.datamodule.components.event_mapping import XCEventMapping
from src.datamodule.components.event_decoding import EventDecoding


In [3]:
#@title Configuration. { vertical-output: true }

# Define the model
model_choice = 'perch'  #@param
model_choice = 'birdnet'  #@param

config = config_dict.ConfigDict()
config.embed_fn_config = config_dict.ConfigDict()
config.embed_fn_config.model_config = config_dict.ConfigDict()

# Pick the input and output targets.
config.source_file_patterns = ['']  #@param
config.output_dir = '/tmp/embeddings'  #@param

# For Perch, the directory containing the model.
# Alternatively, set the perch_tfhub_model_version, and the model will load
# directly from TFHub.
# Note that only one of perch_model_path and perch_tfhub_version should be set.
perch_model_path = ''  #@param
perch_tfhub_version = 2  #@param

# For BirdNET, point to the specific tflite file.
birdnet_model_path = '/Users/moritzrichert/Downloads/V2.4/BirdNET_GLOBAL_6K_V2.4_Model_FP32.tflite'  #@param
if model_choice == 'perch':
  config.embed_fn_config.model_key = 'taxonomy_model_tf'
  config.embed_fn_config.model_config.window_size_s = 5.0
  config.embed_fn_config.model_config.hop_size_s = 5.0
  config.embed_fn_config.model_config.sample_rate = 32000
  config.embed_fn_config.model_config.tfhub_version = perch_tfhub_version
  config.embed_fn_config.model_config.model_path = perch_model_path
elif model_choice == 'birdnet':
  config.embed_fn_config.model_key = 'birdnet'
  config.embed_fn_config.model_config.window_size_s = 3.0
  config.embed_fn_config.model_config.hop_size_s = 3.0
  config.embed_fn_config.model_config.sample_rate = 48000
  config.embed_fn_config.model_config.model_path = birdnet_model_path
  # Note: This class list is appropriate for Birdnet 2.1, 2.2, and 2.3
  config.embed_fn_config.model_config.class_list_name = 'birdnet_v2_4'
  config.embed_fn_config.model_config.num_tflite_threads = 4

# Only write embeddings to reduce size.
config.embed_fn_config.write_embeddings = True
config.embed_fn_config.write_logits = False
config.embed_fn_config.write_separated_audio = False
config.embed_fn_config.write_raw_audio = False


# Embedding windows are broken up into groups, typically one minute in length.
# This lets us limit input size to the model, track progres and
# recover from failures more easily.
config.shard_len_s = 60  #@param
config.num_shards_per_file = 10  #@param

# Number of parent directories to include in the filename.
config.embed_fn_config.file_id_depth = 1

# Number of TF Record files to create.
config.tf_record_shards = 10  #@param

In [4]:
#@title Set up. { vertical-output: true }

# Set up the embedding function, including loading models.
embed_fn = embed_lib.EmbedFn(**config.embed_fn_config)
print('\n\nLoading model(s)...')
embed_fn.setup()

# Create output directory and write the configuration.
output_dir = epath.Path(config.output_dir)
output_dir.mkdir(exist_ok=True, parents=True)
embed_lib.maybe_write_config(config, output_dir)

# Create SourceInfos.
source_infos = embed_lib.create_source_infos(
    config.source_file_patterns,
    config.num_shards_per_file,
    config.shard_len_s)

print(f'Found {len(source_infos)} source infos.')
print(source_infos)
print(os.getcwd())

print('\n\nTest-run of model...')
window_size_s = config.embed_fn_config.model_config.window_size_s
sr = config.embed_fn_config.model_config.sample_rate
z = np.zeros([int(sr * window_size_s)])
test_embeds = embed_fn.embedding_model.embed(z)
print('Setup complete!')



Loading model(s)...
.
Found 10 source infos.
[SourceInfo(filepath='.', id=0, shard_num=0, shard_len_s=60), SourceInfo(filepath='.', id=1, shard_num=1, shard_len_s=60), SourceInfo(filepath='.', id=2, shard_num=2, shard_len_s=60), SourceInfo(filepath='.', id=3, shard_num=3, shard_len_s=60), SourceInfo(filepath='.', id=4, shard_num=4, shard_len_s=60), SourceInfo(filepath='.', id=5, shard_num=5, shard_len_s=60), SourceInfo(filepath='.', id=6, shard_num=6, shard_len_s=60), SourceInfo(filepath='.', id=7, shard_num=7, shard_len_s=60), SourceInfo(filepath='.', id=8, shard_num=8, shard_len_s=60), SourceInfo(filepath='.', id=9, shard_num=9, shard_len_s=60)]
/Users/moritzrichert/Projects/GADME-BaselineResults-BA/notebooks/Bird_Embeddings


Test-run of model...


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Setup complete!


In [5]:
dataset_name = "DBD-research-group/gadme_v1"
cache_dir = "/Volumes/BigChongusF/Datasets/Huggingface/gadme_v1/data"
dataset_config = DatasetConfig(cache_dir, "high_sierras", dataset_name, "high_sierras", 42, 22, 3, 0.2, "multiclass")
loaders_config = LoadersConfig()
loaders_config.train = LoaderConfig(12, True, 6, True, False, True, 2)
loaders_config.valid = LoaderConfig(12, False)
loaders_config.test = LoaderConfig(12, False)
transforms_wrapper = TransformsWrapper(decoding=EventDecoding(0, 10, 48000))
# transforms_wrapper = EventDecoding(0, 10, 48000)
mapper = XCEventMapping()
dm = GADMEDataModule(dataset_config, loaders_config, transforms_wrapper, mapper)

In [6]:
dm.prepare_data() 

Saving the dataset (0/1 shards):   0%|          | 0/21650 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/5413 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/10296 [00:00<?, ? examples/s]

In [7]:
dm.setup("fit")

In [8]:
loader = dm.train_dataloader()
loader

<torch.utils.data.dataloader.DataLoader at 0x2d75bf8e0>

In [9]:
loader.dataset

Dataset({
    features: ['filepath', 'labels', 'detected_events', 'start_time', 'end_time'],
    num_rows: 21650
})

In [10]:
x = next(iter(dm.train_dataloader()))
x

{'input_values': tensor([[ 0.2774,  0.2697,  0.1854,  ...,  0.0000,  0.0000,  0.0000],
         [-0.0533, -0.0517, -0.0371,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0796,  0.0981,  0.1763,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [ 0.1569,  0.0525, -0.0994,  ...,  0.0000,  0.0000,  0.0000],
         [-0.3469, -0.3913, -0.3506,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0100,  0.0033, -0.0137,  ...,  0.0000,  0.0000,  0.0000]]),
 'labels': tensor([ 0, 14,  0, 18, 15,  0, 21,  7,  0,  0, 16, 14])}

In [11]:
np_input = x["input_values"][0].numpy()
np_input

array([0.27743042, 0.26968205, 0.18535483, ..., 0.        , 0.        ,
       0.        ], dtype=float32)

In [20]:
len(np_input)

160000

In [18]:
np_batch = x["input_values"].numpy()
np_batch

array([[ 0.27743042,  0.26968205,  0.18535483, ...,  0.        ,
         0.        ,  0.        ],
       [-0.05330318, -0.05168802, -0.03712571, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.0796349 ,  0.09809804,  0.1763438 , ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.15688181,  0.05245745, -0.09936112, ...,  0.        ,
         0.        ,  0.        ],
       [-0.34689653, -0.39128554, -0.35058004, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.00999713,  0.00328362, -0.01369417, ...,  0.        ,
         0.        ,  0.        ]], dtype=float32)

In [19]:
embedds = embed_fn.embedding_model.batch_embed(np_batch)
embedds

InferenceOutputs(embeddings=array([[[[0.00895297, 1.0881592 , 0.12384493, ..., 0.        ,
          0.10322309, 1.4648584 ]]],


       [[[0.        , 0.1552803 , 0.18906957, ..., 0.66761416,
          0.        , 0.3070067 ]]],


       [[[0.06156166, 0.03091764, 0.08061623, ..., 1.2553805 ,
          0.11740097, 0.9848521 ]]],


       ...,


       [[[0.        , 0.01782457, 0.17104888, ..., 1.3984874 ,
          0.        , 1.160324  ]]],


       [[[0.        , 1.141028  , 0.46233058, ..., 1.0300186 ,
          0.05711158, 0.03352364]]],


       [[[0.28156266, 0.74211866, 0.02485014, ..., 0.98906773,
          0.1429816 , 0.13127708]]]], dtype=float32), logits={'birdnet_v2_4': array([[[[-14.297033 , -13.234331 , -15.535536 , ..., -19.077936 ,
          -11.700456 , -14.971894 ]]],


       [[[-12.3372345, -16.130344 , -17.099237 , ..., -13.6679945,
          -12.124773 , -15.84532  ]]],


       [[[-10.2310095, -12.046837 ,  -9.470226 , ..., -10.365631 ,
           -8.456606 , -

In [17]:
embeddings = embed_fn.embedding_model.embed(np_input)
embeddings

InferenceOutputs(embeddings=array([[[0.00895297, 1.0881592 , 0.12384493, ..., 0.        ,
         0.10322309, 1.4648584 ]]], dtype=float32), logits={'birdnet_v2_4': array([[[-14.297033, -13.234331, -15.535536, ..., -19.077936,
         -11.700456, -14.971894]]], dtype=float32)}, separated_audio=None, batched=False)

In [12]:
write_logits, model_output = embed_fn._audio_to_example_slim(np_input)

In [13]:
write_logits

False

In [14]:
model_output

InferenceOutputs(embeddings=array([[[0.00895297, 1.0881592 , 0.12384493, ..., 0.        ,
         0.10322309, 1.4648584 ]]], dtype=float32), logits={'birdnet_v2_4': array([[[-14.297033, -13.234331, -15.535536, ..., -19.077936,
         -11.700456, -14.971894]]], dtype=float32)}, separated_audio=None, batched=False)

In [15]:
for source_info in tqdm.tqdm(source_infos):
    examples = embed_fn.process_new_SourceInfo(source_info=source_info)

  0%|          | 0/10 [00:00<?, ?it/s]


AttributeError: 'SourceInfo' object has no attribute 'end_time'

In [None]:
succ, fail = 0,0
with tf_examples.EmbeddingsTFRecordMultiWriter(
    output_dir=output_dir, num_files=config.tf_record_shards) as file_writer:
  for source_info in tqdm.tqdm(source_infos):
    # examples = embed_fn.process(source_info=source_info)
    examples = embed_fn.process_new_SourceInfo(source_info=source_info)
    if examples is None:
      fail += 1
      continue
    for example in examples:
      file_writer.write(example.SerializeToString())
    succ += 1
  file_writer.flush()
print(f'\n\nSuccessfully processed {succ} source_infos, failed {fail} times.')

  0%|          | 0/21650 [00:00<?, ?it/s]

  0%|          | 1/21650 [00:18<112:10:42, 18.65s/it]

<class 'dict'>





TypeError: Parameter to MergeFrom() must be instance of same class: expected tensorflow.core.example.feature_pb2.Feature got tuple.