In [1]:
 #@title Imports. { vertical-output: true }
from etils import epath
from ml_collections import config_dict
import numpy as np
import tensorflow as tf
import tqdm
# from src.perch.inference import colab_utils
# colab_utils.initialize(use_tf_gpu=True, disable_warnings=True)

from src.perch import audio_utils
from src.perch.inference import embed_lib
from src.perch.inference import tf_examples

In [2]:
#@title Configuration. { vertical-output: true }

# Define the model
model_choice = 'perch'  #@param
model_choice = 'birdnet'  #@param

config = config_dict.ConfigDict()
config.embed_fn_config = config_dict.ConfigDict()
config.embed_fn_config.model_config = config_dict.ConfigDict()

# Pick the input and output targets.
config.source_file_patterns = ['']  #@param
config.output_dir = '/tmp/embeddings'  #@param

# For Perch, the directory containing the model.
# Alternatively, set the perch_tfhub_model_version, and the model will load
# directly from TFHub.
# Note that only one of perch_model_path and perch_tfhub_version should be set.
perch_model_path = ''  #@param
perch_tfhub_version = 2  #@param

# For BirdNET, point to the specific tflite file.
birdnet_model_path = '/Users/moritzrichert/Downloads/V2.4/BirdNET_GLOBAL_6K_V2.4_Model_FP32.tflite'  #@param
if model_choice == 'perch':
  config.embed_fn_config.model_key = 'taxonomy_model_tf'
  config.embed_fn_config.model_config.window_size_s = 5.0
  config.embed_fn_config.model_config.hop_size_s = 5.0
  config.embed_fn_config.model_config.sample_rate = 32000
  config.embed_fn_config.model_config.tfhub_version = perch_tfhub_version
  config.embed_fn_config.model_config.model_path = perch_model_path
elif model_choice == 'birdnet':
  config.embed_fn_config.model_key = 'birdnet'
  config.embed_fn_config.model_config.window_size_s = 3.0
  config.embed_fn_config.model_config.hop_size_s = 3.0
  config.embed_fn_config.model_config.sample_rate = 48000
  config.embed_fn_config.model_config.model_path = birdnet_model_path
  # Note: This class list is appropriate for Birdnet 2.1, 2.2, and 2.3
  config.embed_fn_config.model_config.class_list_name = 'birdnet_v2_4'
  config.embed_fn_config.model_config.num_tflite_threads = 4

# Only write embeddings to reduce size.
config.embed_fn_config.write_embeddings = True
config.embed_fn_config.write_logits = False
config.embed_fn_config.write_separated_audio = False
config.embed_fn_config.write_raw_audio = False


# Embedding windows are broken up into groups, typically one minute in length.
# This lets us limit input size to the model, track progres and
# recover from failures more easily.
config.shard_len_s = 60  #@param
config.num_shards_per_file = 10  #@param

# Number of parent directories to include in the filename.
config.embed_fn_config.file_id_depth = 1

# Number of TF Record files to create.
config.tf_record_shards = 10  #@param

In [3]:
#@title Set up. { vertical-output: true }

# Set up the embedding function, including loading models.
embed_fn = embed_lib.EmbedFn(**config.embed_fn_config)
print('\n\nLoading model(s)...')
embed_fn.setup()

# Create output directory and write the configuration.
output_dir = epath.Path(config.output_dir)
output_dir.mkdir(exist_ok=True, parents=True)
embed_lib.maybe_write_config(config, output_dir)

# Create SourceInfos.
source_infos = embed_lib.create_source_infos(
    config.source_file_patterns,
    config.num_shards_per_file,
    config.shard_len_s)
print(f'Found {len(source_infos)} source infos.')

print('\n\nTest-run of model...')
window_size_s = config.embed_fn_config.model_config.window_size_s
sr = config.embed_fn_config.model_config.sample_rate
z = np.zeros([int(sr * window_size_s)])
test_embeds = embed_fn.embedding_model.embed(z)
print('Setup complete!')



Loading model(s)...
Found 10 source infos.


Test-run of model...


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Setup complete!


In [4]:
embed_fn.embedding_model

BirdNet(sample_rate=48000, model_path='/Users/moritzrichert/Downloads/V2.4/BirdNET_GLOBAL_6K_V2.4_Model_FP32.tflite', model=<tensorflow.lite.python.interpreter.Interpreter object at 0x107aa1e10>, tflite=True, class_list=ClassList(namespace='birdnet', classes=('Abroscopus albogularis', 'Abroscopus schisticeps', 'Abroscopus superciliaris', 'Aburria aburri', 'Acanthagenys rufogularis', 'Acanthidops bairdi', 'Acanthis cabaret', 'Acanthis flammea', 'Acanthis hornemanni', 'Acanthisitta chloris', 'Acanthiza apicalis', 'Acanthiza chrysorrhoa', 'Acanthiza ewingii', 'Acanthiza inornata', 'Acanthiza lineata', 'Acanthiza nana', 'Acanthiza pusilla', 'Acanthiza reguloides', 'Acanthiza uropygialis', 'Acanthorhynchus superciliosus', 'Acanthorhynchus tenuirostris', 'Accipiter badius', 'Accipiter bicolor', 'Accipiter cirrocephalus', 'Accipiter cooperii', 'Accipiter fasciatus', 'Accipiter gentilis', 'Accipiter gularis', 'Accipiter hiogaster', 'Accipiter melanoleucus', 'Accipiter minullus', 'Accipiter nis

In [5]:
print(test_embeds)

InferenceOutputs(embeddings=array([[[0.05516903, 0.12140995, 0.01952933, ..., 0.8852808 ,
         0.16755676, 0.97302103]]], dtype=float32), logits={'birdnet_v2_4': array([[[ -8.468899, -11.226729,  -8.486603, ..., -10.954041,
          -7.954918, -10.368435]]], dtype=float32)}, separated_audio=None, batched=False)


In [6]:
embed_fn.embedding_model.model

<tensorflow.lite.python.interpreter.Interpreter at 0x107aa1e10>

In [7]:
import tf2onnx

In [8]:
path_to_onnx = "/Users/moritzrichert/Models/onnx/bird-net-lite/model/birdnetv24.onnx"

In [10]:
interpreter = embed_fn.embedding_model.model

In [18]:
inp = interpreter.get_input_details()[0]
inp

{'name': 'INPUT',
 'index': 0,
 'shape': array([     1, 144000], dtype=int32),
 'shape_signature': array([    -1, 144000], dtype=int32),
 'dtype': numpy.float32,
 'quantization': (0.0, 0),
 'quantization_parameters': {'scales': array([], dtype=float32),
  'zero_points': array([], dtype=int32),
  'quantized_dimension': 0},
 'sparsity_parameters': {}}

In [19]:
oup = interpreter.get_output_details()[0]
oup

{'name': 'Identity',
 'index': 546,
 'shape': array([   1, 6522], dtype=int32),
 'shape_signature': array([  -1, 6522], dtype=int32),
 'dtype': numpy.float32,
 'quantization': (0.0, 0),
 'quantization_parameters': {'scales': array([], dtype=float32),
  'zero_points': array([], dtype=int32),
  'quantized_dimension': 0},
 'sparsity_parameters': {}}

In [13]:
interpreter.get_signature_list()

{}

In [32]:
onnx_model, hmm = tf2onnx.convert.from_tflite(tflite_path=birdnet_model_path, output_path=path_to_onnx)

ERROR:tf2onnx.tfonnx:Failed to convert node 'model/MEL_SPEC2/stft/rfft2' (fct=<bound method RFFT2DOp.version_13 of <class 'tf2onnx.onnx_opset.signal.RFFT2DOp'>>)
'OP=RFFT2D\nName=model/MEL_SPEC2/stft/rfft2\nInputs:\n\tmodel/MEL_SPEC2/stft/rfft1=Unsqueeze, [-1, -1, 1, 1024], 1\n\tmodel/MEL_SPEC2/stft/rfft=Const, [2], 6\nOutpus:\n\tmodel/MEL_SPEC2/stft/rfft2=[-1, -1, 1, 513], 14'
Traceback (most recent call last):
  File "/Users/moritzrichert/opt/miniconda3/envs/5gadme/lib/python3.10/site-packages/tf2onnx/tfonnx.py", line 292, in tensorflow_onnx_mapping
    func(g, node, **kwargs, initialized_tables=initialized_tables, dequantize=dequantize)
  File "/Users/moritzrichert/opt/miniconda3/envs/5gadme/lib/python3.10/site-packages/tf2onnx/onnx_opset/signal.py", line 906, in version_13
    return cls.any_version_2d(True, 13, ctx, node, **kwargs)
  File "/Users/moritzrichert/opt/miniconda3/envs/5gadme/lib/python3.10/site-packages/tf2onnx/onnx_opset/signal.py", line 465, in any_version_2d
    uti

In [25]:
window_size_s = config.embed_fn_config.model_config.window_size_s
sr = config.embed_fn_config.model_config.sample_rate
z = np.zeros([int(sr * window_size_s)])

In [30]:
embed_fn.embedding_model.embed(z).embeddings

array([[[0.05516903, 0.12140995, 0.01952933, ..., 0.8852808 ,
         0.16755676, 0.97302103]]], dtype=float32)

In [33]:
type(onnx_model)

onnx.onnx_ml_pb2.ModelProto

In [34]:
type(hmm)

NoneType

In [35]:
import onnx

In [36]:
onnx_path = "/Users/moritzrichert/Models/onnx/bird-net-lite/model-tflite/model.onnx"

In [38]:
onnx.save(onnx_model, onnx_path)

In [39]:
import onnxruntime as ort

In [40]:
sess = ort.InferenceSession(onnx_path)

InvalidGraph: [ONNXRuntimeError] : 10 : INVALID_GRAPH : Load model from /Users/moritzrichert/Models/onnx/bird-net-lite/model-tflite/model.onnx failed:This is an invalid model. In Node, ("model/MEL_SPEC2/stft/rfft2", RFFT2D, "", -1) : ("model/MEL_SPEC2/stft/rfft1": tensor(float),"model/MEL_SPEC2/stft/rfft": tensor(int32),) -> ("model/MEL_SPEC2/stft/rfft2",) , Error No Op registered for RFFT2D with domain_version of 15