# Model Architecture Tests

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from bicycle_bell_sed_models.models.crnn import crnn 
from bicycle_bell_sed_models.models.yamnet_base import yamnet_base
from bicycle_bell_sed_models.models.yamnet_lstm_fc import yamnet_lstm_fc

import tensorflow as tf
import tensorflow.keras as keras
import numpy

In [3]:
sr = 16000
audioLength = sr * 3 # sr*sec
rdmAudio = numpy.array([numpy.random.random(audioLength) for _ in range(3)])
rdmLabel = numpy.array([numpy.random.randint(0, 2) for _ in range(3)])

In [4]:
ds = tf.data.Dataset.from_tensor_slices(({f'wav_{sr}_mono_input': rdmAudio}, {'class_output': rdmLabel}))
ds = ds.batch(3)

print(ds)
for xbatch, ybatch in ds:
  print(xbatch)
  print(ybatch)
  for x, y in zip(xbatch, ybatch):
    print(x)
    print(y)
    break
  break

<BatchDataset shapes: ({wav_16000_mono_input: (None, 48000)}, {class_output: (None,)}), types: ({wav_16000_mono_input: tf.float64}, {class_output: tf.int32})>
{'wav_16000_mono_input': <tf.Tensor: shape=(3, 48000), dtype=float64, numpy=
array([[0.58602391, 0.19531475, 0.20526848, ..., 0.62241431, 0.7691316 ,
        0.3080858 ],
       [0.76500825, 0.30596491, 0.12973217, ..., 0.37422351, 0.61563969,
        0.65762062],
       [0.42036922, 0.0733766 , 0.18922379, ..., 0.2413823 , 0.93748899,
        0.76353425]])>}
{'class_output': <tf.Tensor: shape=(3,), dtype=int32, numpy=array([0, 0, 0])>}
wav_16000_mono_input
class_output


## CRNN Test

single-output

In [6]:
model_crnn = crnn()
model_crnn.summary()
model_crnn.compile(
    optimizer='adam', 
    loss={
        "class_output": 'binary_crossentropy',
    },
    metrics={
        "class_output": [
            'accuracy',
        ],
    },
    loss_weights={
        "class_output": 1.0,
    }, 
)
model_crnn.fit(ds)

Model: "crnn"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 wav_16000_mono_input (Input  [(None, None)]           0         
 Layer)                                                          
                                                                 
 log_mel_spectrogram_transfo  ((None, None, 64),       0         
 rm (LogMelSpectrogramTransf   (None, None, 96, 64))             
 ormLayer)                                                       
                                                                 
 reshape (Reshape)           (None, None, 64, 1)       0         
                                                                 
 time_distributed_conv1d (Ti  (None, None, 33, 64)     2112      
 meDistributed)                                                  
                                                                 
 time_distributed_batch_norm  (None, None, 33, 64)     256    

<keras.callbacks.History at 0x2145db20f70>

multi-output

In [6]:
model_crnn = crnn()
model_crnn.compile(optimizer='adam', 
    loss=['binary_crossentropy', None],
    metrics=[('accuracy',), (None,)],
    loss_weights=[1.0, 0.0], 
)
model_crnn.fit(ds, batch_size=3)



<keras.callbacks.History at 0x21f244b59d0>

In [7]:
model_crnn = crnn()
model_crnn.compile(optimizer='adam', 
    loss={
        "class_output": 'binary_crossentropy', # last layer name
        "log_mel_spectrogram_output": None,
    },
    metrics={
        "class_output": [
            'accuracy',
        ],
        "log_mel_spectrogram_output": [
          None,
        ],
    },
    loss_weights={
        "class_output": 1.0, 
        "log_mel_spectrogram_output": 0.0,
    }, 
)
model_crnn.fit(ds, batch_size=3)



<keras.callbacks.History at 0x21fefd4c550>

## YAMNet Base Test

single-output

In [13]:
model_yn_base = yamnet_base()
model_yn_base.summary()
model_yn_base.compile(
    optimizer='adam', 
    loss={
        "class_output": 'binary_crossentropy',
    },
    metrics={
        "class_output": [
            'accuracy',
        ],
    },
    loss_weights={
        "class_output": 1.0,
    }, 
)
model_yn_base.fit(ds)

Model: "yamnet_base"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 wav_16000_mono_input (Input  [(None, None)]           0         
 Layer)                                                          
                                                                 
 yamnet_wrapper (YAMNetWrapp  {'scores': (None, None,   0        
 er)                         521),                               
                              'spectrogram': (None, N            
                             one, 64)}                           
                                                                 
 tf.__operators__.getitem_1   (None, None)             0         
 (SlicingOpLambda)                                               
                                                                 
 scores (Layer)              (None, None)              0         
                                                       





<keras.callbacks.History at 0x19676a649d0>

multi-output

In [11]:
model_yn_base = yamnet_base()
model_yn_base.compile(optimizer='adam', 
    loss=['binary_crossentropy', None],
    metrics=[('accuracy',), (None,)],
    loss_weights=[1.0, 0.0], 
)
model_yn_base.fit(ds, batch_size=3)



<keras.callbacks.History at 0x22046f89f70>

In [12]:
model_yn_base = yamnet_base()
model_yn_base.compile(optimizer='adam', 
    loss={
        "class_output": 'binary_crossentropy', # last layer name
        "log_mel_spectrogram_output": None,
    },
    metrics={
        "class_output": [
            'accuracy',
        ],
        "log_mel_spectrogram_output": [
          None,
        ],
    },
    loss_weights={
        "class_output": 1.0, 
        "log_mel_spectrogram_output": 0.0,
    }, 
)
model_yn_base.fit(ds, batch_size=3)



<keras.callbacks.History at 0x2205f2a2d60>

## YAMNet Extended

single-output

In [11]:
model_yn_extended = yamnet_lstm_fc()
model_yn_extended.summary()
model_yn_extended.compile(
    optimizer='adam', 
    loss={
        "class_output": 'binary_crossentropy',
    },
    metrics={
        "class_output": [
            'accuracy',
        ],
    },
    loss_weights={
        "class_output": 1.0,
    }, 
)
model_yn_extended.fit(ds)

Model: "yamnet_lstm_fc"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 wav_16000_mono_input (Input  [(None, None)]           0         
 Layer)                                                          
                                                                 
 yamnet_wrapper (YAMNetWrapp  {'embeddings': (None, No  0        
 er)                         ne, 1024),                          
                              'spectrogram': (None, N            
                             one, 64)}                           
                                                                 
 yamnet_embeddings (Layer)   (None, None, 1024)        0         
                                                                 
 lstm (LSTM)                 (None, None, 64)          278784    
                                                                 
 time_distributed_dense_1 (T  (None, None, 32)      





<keras.callbacks.History at 0x196407acb20>

multi-output

In [16]:
model_yn_extended = yamnet_lstm_fc()
model_yn_extended.compile(optimizer='adam', 
    loss=['binary_crossentropy', None],
    metrics=[('accuracy',), (None,)],
    loss_weights=[1.0, 0.0], 
)
model_yn_extended.fit(ds, batch_size=3)







<keras.callbacks.History at 0x22092e8aac0>

In [17]:
model_yn_extended = yamnet_lstm_fc()
model_yn_extended.compile(optimizer='adam', 
    loss={
        "class_output": 'binary_crossentropy', # last layer name
        "log_mel_spectrogram_output": None,
    },
    metrics={
        "class_output": [
            'accuracy',
        ],
        "log_mel_spectrogram_output": [
          None,
        ],
    },
    loss_weights={
        "class_output": 1.0, 
        "log_mel_spectrogram_output": 0.0,
    }, 
)
model_yn_extended.fit(ds, batch_size=3)







<keras.callbacks.History at 0x220825e7a00>