# RKNN fixed version (noMel/conv1d)


===============================================================

* build new noMel model based on base line
* replace bad layers with working layers
    * no need to change the width of the layers
* run voices through the model and compare to baseline

===============================================================

In [1]:
%load_ext autoreload
%autoreload 2
## our utils
from utils.common_import import *
from utils.test_all_voices import *

2.6.0+cu124


In [2]:
%%capture --no-display
import my_utils as myUtils
from play1_setBase_line_B2 import original_model,base_line_embedding

## create new model

In [3]:

class Conv1dAs2d(nn.Module):
    """
    Replace a Conv1d with an equivalent Conv2d (H = kernel, W = 1)
    so that ONNX shows only Conv2d, which RKNN supports.
    """
    def __init__(self, conv1d: nn.Conv1d):
        super().__init__()

        k, d, s, g = conv1d.kernel_size[0], conv1d.dilation[0], conv1d.stride[0], conv1d.groups

        # --- numeric padding ---
        if isinstance(conv1d.padding, str):        # "same" or "valid"
            if conv1d.padding == "same":
                pad_num = floor(d * (k - 1) / 2)
            else:                                  # "valid"
                pad_num = 0
        else:                                      # already a tuple/int
            pad_num = conv1d.padding[0]

        # Build the Conv2d with weights copied
        self.conv2d = nn.Conv2d(
            in_channels  = conv1d.in_channels,
            out_channels = conv1d.out_channels,
            kernel_size  = (k, 1),
            stride       = (s, 1),
            padding      = (pad_num, 0),
            dilation     = (d, 1),
            groups       = g,
            bias         = conv1d.bias is not None
        )

        with torch.no_grad():
            # (out, in/groups, k) → (out, in/groups, k, 1)
            self.conv2d.weight.copy_(conv1d.weight.unsqueeze(-1))
            if conv1d.bias is not None:
                self.conv2d.bias.copy_(conv1d.bias)

    def forward(self, x):           # x: [B, C, T]
        #todo : pay attention to the input shape! AVI APPROVED
        return self.conv2d(x.unsqueeze(-1)).squeeze(-1)


In [4]:
########################################
# 2) Define a Model Class without MelBanks
########################################
import torch
import torch.nn as nn

class ReDimNetNoMel(nn.Module):
    """
    A wrapper around the original ReDimNetWrap that:
      - Excludes the 'spec' (MelBanks) module
      - Uses 'backbone', 'pool', 'bn', and 'linear'
    We expect a precomputed mel spectrogram as input with shape [B, 1, n_mels, time_frames].
    """
    def __init__(self, original_wrap):
        super().__init__()
        
        # Grab references to the submodules we want to keep
        self.backbone = original_wrap.backbone
        
        # fix problem01
        # list of (stage, block) indices you already know are problematic
        TARGETS = [(0, 6), (1, 6), (2, 7), (3, 8), (4, 8) , (5, 8)]

        for s_idx, b_idx in TARGETS:
            for tcm_idx in range(4):
                block = self.backbone.__getattr__(f"stage{s_idx}")[b_idx].tcm[tcm_idx]

                block.dwconvs[0] = Conv1dAs2d(block.dwconvs[0])
                block.pwconv1    = Conv1dAs2d(block.pwconv1)   # 1×1 conv

        
        # Replace ASTP with RKNN-safe version:
        self.pool = original_wrap.pool
        self.bn = original_wrap.bn
        self.linear = original_wrap.linear

    def forward(self, x):
        # x: shape [B, 1, n_mels, time_frames]
        # (1) Pass through the backbone
        x = self.backbone(x)    # shape might become [B, channels, frames] or similar
        # (2) Pooling
        x = self.pool(x)        # ASTP => shape likely [B, embedding_dim]
        # (3) BatchNorm
        x = self.bn(x)
        # (4) Final linear => 192-dim (if that's your embedding size)
        x = self.linear(x)
        return x


# Create an instance of our new model that skips the MelBanks front-end
model_no_mel = ReDimNetNoMel(original_model)



run to test the model


In [5]:
model_no_mel.eval()  # <- this line is critical!
dummy = torch.randn(1, 1, 72, 200)
model_no_mel(dummy)

tensor([[ 2.6393e-01, -5.0332e-01,  1.9214e+00, -7.4124e-01,  1.7345e+00,
          8.3869e-01, -1.3975e+00,  1.1946e-01,  1.7128e+00,  1.5095e-01,
          5.9301e-01, -4.2547e-01, -7.9606e-01,  6.8751e-01,  1.3232e-01,
         -1.7492e-01,  5.0086e-01, -1.1521e-01, -2.8888e-01, -1.3709e+00,
          3.1427e-01, -2.2850e+00, -1.1862e+00, -1.3399e+00,  3.3672e-01,
         -2.5876e+00,  2.9062e-01,  1.5842e+00, -1.6648e+00, -2.8058e-01,
          2.3136e+00,  1.7306e-01,  2.5088e-01, -6.1019e-01, -3.0805e-01,
         -1.5417e-01, -6.0994e-01, -4.8904e-01,  2.0635e+00, -1.2181e+00,
         -3.3012e-01, -5.9372e-01, -1.8517e+00, -8.6455e-01, -5.2859e-01,
          1.0461e+00, -1.1783e-03,  3.7095e-01, -3.4423e-01, -7.8996e-01,
          1.3540e+00,  2.5815e+00,  1.0338e+00, -8.0639e-02,  1.8188e+00,
          3.2093e+00, -1.6101e+00, -1.6051e-01,  1.4646e+00, -7.5814e-01,
         -3.8342e-01,  5.7386e-01,  2.3761e+00, -4.8016e-01,  2.6021e-01,
          6.0601e-01, -7.2203e-01,  2.

### FP16 check


In [6]:
with torch.no_grad():
    fp16_net = copy.deepcopy(model_no_mel).half().eval()
    ok = torch.isfinite(fp16_net(dummy.half())).all()
    print('safe in pure FP16?', ok)

safe in pure FP16? tensor(True)


## info

In [7]:
model_no_mel.eval()


ReDimNetNoMel(
  (backbone): ReDimNet(
    (stem): Sequential(
      (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=same)
      (1): LayerNorm(C=(16,), data_format=channels_first, eps=1e-06)
      (2): to1d()
    )
    (stage0): Sequential(
      (0): weigth1d(w=(1, 1, 1, 1),sequential=False)
      (1): to2d(f=72,c=16)
      (2): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
      (3): ConvBlock2d(
        (conv_block): ConvNeXtLikeBlock(
          (dwconvs): ModuleList(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=4)
          )
          (norm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): GELU(approximate='none')
          (pwconv1): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
        )
      )
      (4): ConvBlock2d(
        (conv_block): ConvNeXtLikeBlock(
          (dwconvs): ModuleList(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=

In [8]:
summary(model_no_mel, (1, 1, 72, 134))


Layer (type:depth-idx)                                       Output Shape              Param #
ReDimNetNoMel                                                [1, 192]                  --
├─ReDimNet: 1-1                                              [1, 1152, 134]            --
│    └─Sequential: 2-1                                       [1, 1152, 134]            --
│    │    └─Conv2d: 3-1                                      [1, 16, 72, 134]          160
│    │    └─LayerNorm: 3-2                                   [1, 16, 72, 134]          32
│    │    └─to1d: 3-3                                        [1, 1152, 134]            --
│    └─Sequential: 2-2                                       [1, 1152, 134]            --
│    │    └─weigth1d: 3-4                                    [1, 1152, 134]            (1)
│    │    └─to2d: 3-5                                        [1, 16, 72, 134]          --
│    │    └─Conv2d: 3-6                                      [1, 16, 72, 134]          272
│ 

# TORCH SIDE

In [9]:
def torch_inference(wav_path: str):
    # (a) Load audio
    waveform, sample_rate = torchaudio.load(wav_path)  # shape: [channels, time]
    # If stereo, select one channel, or average:
    if waveform.shape[0] > 1:
        waveform = waveform.mean(dim=0, keepdim=True)

    # Resample if needed
    target_sample_rate=16000
    if sample_rate != target_sample_rate:
        resampler = T.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
        waveform = resampler(waveform)

    # (b) Convert to log-mel
    log_mel = myUtils.waveform_to_logmel(waveform)
    print('feeding logmel shape:', log_mel.shape)
    
    # (c) Forward pass
    with torch.no_grad():
        embedding = model_no_mel(log_mel)  # shape typically [1, 192] or so

    print("Embedding shape:", embedding.shape)
    #print("Embedding:", embedding)
    return embedding

run test

In [10]:
torch_embedding = test_all_voices(
    extract_speaker_embedding_function = torch_inference,
    cosine_similarity_function = myUtils.cosine_similarity
)

Input waveform shape: torch.Size([1, 32000])
feeding logmel shape: torch.Size([1, 1, 72, 134])
Embedding shape: torch.Size([1, 192])
Input waveform shape: torch.Size([1, 25776])
Padding log_mel from 108 to 134 frames
feeding logmel shape: torch.Size([1, 1, 72, 134])
Embedding shape: torch.Size([1, 192])
Input waveform shape: torch.Size([1, 23570])
Padding log_mel from 99 to 134 frames
feeding logmel shape: torch.Size([1, 1, 72, 134])
Embedding shape: torch.Size([1, 192])
Input waveform shape: torch.Size([1, 32000])
feeding logmel shape: torch.Size([1, 1, 72, 134])
Embedding shape: torch.Size([1, 192])
Input waveform shape: torch.Size([1, 32000])
feeding logmel shape: torch.Size([1, 1, 72, 134])
Embedding shape: torch.Size([1, 192])
Input waveform shape: torch.Size([1, 28126])
Padding log_mel from 118 to 134 frames
feeding logmel shape: torch.Size([1, 1, 72, 134])
Embedding shape: torch.Size([1, 192])
Input waveform shape: torch.Size([1, 32000])
feeding logmel shape: torch.Size([1, 1, 7

## compare to baseline

* test embedding compare of voice in the currnet model with baseline model:

In [11]:
print(f"Similarity embde0: {myUtils.cosine_similarity(base_line_embedding['embed0'], torch_embedding['embed0'])}")
print(f"Similarity embde1: {myUtils.cosine_similarity(base_line_embedding['embed1'], torch_embedding['embed1'])}")
print(f"Similarity embde2: {myUtils.cosine_similarity(base_line_embedding['embed2'], torch_embedding['embed2'])}")
print(f"Similarity embde3: {myUtils.cosine_similarity(base_line_embedding['embed3'], torch_embedding['embed3'])}")
print(f"Similarity embde4: {myUtils.cosine_similarity(base_line_embedding['embed4'], torch_embedding['embed4'])}")
print(f"Similarity embde5: {myUtils.cosine_similarity(base_line_embedding['embed5'], torch_embedding['embed5'])}")
print(f"Similarity embde6: {myUtils.cosine_similarity(base_line_embedding['embed6'], torch_embedding['embed6'])}")

Similarity embde0: 0.9999998807907104
Similarity embde1: 0.9944314956665039
Similarity embde2: 0.9896694421768188
Similarity embde3: 0.9992930293083191
Similarity embde4: 0.9998515844345093
Similarity embde5: 0.997683584690094
Similarity embde6: 0.9999997615814209


# ONNX SIDE

In [12]:
myUtils.export_to_onnx(model_no_mel,onnx_path = "ReDimNet_no_mel.onnx")
!ls -lah ReDimNet_no_mel.onnx

Exported NHWC model to ReDimNet_no_mel_nhwc.onnx
Exported to ReDimNet_no_mel.onnx
-rw-rw-r-- 1 vlad vlad 20M Jul  6 07:04 ReDimNet_no_mel.onnx


### store half

In [13]:
myUtils.restore_in_half_precision('ReDimNet_no_mel.onnx','ReDimNet_no_mel_fp16.onnx')



Converted ReDimNet_no_mel.onnx to half precision and saved as ReDimNet_no_mel_fp16.onnx


## verify

In [14]:
# onnx_path = "ReDimNet_no_mel.onnx"
onnx_path = "ReDimNet_no_mel_fp16.onnx"

In [15]:
onnx_model = onnx.load(onnx_path)
onnx.checker.check_model(onnx_model)
print("ONNX model is valid!")

ONNX model is valid!


In [16]:
def inference_onnx(wav_path):
    """
    Loads an audio file, converts to log-mel, and runs inference
    in an ONNX session. Returns the embedding as a NumPy array.
    """
    print("===================================================")
    print("===========   run_inference_onnx   ================")
    print("===================================================")
    #######################################
    # 1) Load your ONNX model
    #######################################
    # (Optional) onnx.checker to confirm it’s valid
    onnx_model = onnx.load(onnx_path)
    onnx.checker.check_model(onnx_model)
    print(f"Loaded and checked ONNX model from: {onnx_path}")

    # Create an inference session
    session = ort.InferenceSession(onnx_path)

    # Usually we retrieve the first input & output name
    input_name = session.get_inputs()[0].name
    output_name = session.get_outputs()[0].name

    #######################################
    # 2) Load audio, get log-mel
    #######################################
    print("loading audio from:", wav_path)
    waveform, sample_rate = torchaudio.load(wav_path)
    print(f"...Waveform rate {sample_rate}  ; shape : {waveform.shape}")

    
    # If multi-channel, downmix:
    if waveform.shape[0] > 1:
        waveform = waveform.mean(dim=0, keepdim=True)
        
    # Resample if needed
    target_sample_rate=16000
    if sample_rate != target_sample_rate:
        resampler = T.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
        waveform = resampler(waveform)
        # save resampled waveform to files with suffix "_resampled_16.wav"
        # torchaudio.save(wav_path.replace(".wav", "_resampled_16.wav"), waveform, target_sample_rate)

    log_mel =  myUtils.waveform_to_logmel(waveform)
    
    #######################################
    # 3) ONNX Inference
    #######################################
    # Convert to NumPy for ONNX runtime
    log_mel_np = log_mel.cpu().numpy()
    
    ## save log_mel_np to file with suffix "_logmel.npy" to check later
    print("logmelshape : ", log_mel_np.shape)
    log_mel_fp16 = log_mel_np.astype(np.float16)  # → half precision
    orig_name = os.path.splitext(os.path.basename(wav_path))[0]
    folder = os.path.dirname(wav_path)
    out_path = os.path.join(folder, f"logmel_{orig_name}.npy")
    np.save(out_path, log_mel_fp16)
    
    # Run inference
    outputs = session.run([output_name], {input_name: log_mel_np})
    # outputs is a list; typically we want the first item
    embedding = outputs[0]  # shape is [1, embedding_dim]

    # print("Embedding[10]: ", embedding[0:10])  # Print the 10th element of the embedding
    print("Embedding shape:", embedding.shape)
    # print("Embedding data:\n", embedding)
    return embedding


In [17]:
onnx_embedding = test_all_voices(
    extract_speaker_embedding_function = inference_onnx,
    cosine_similarity_function = myUtils.cosine_similarity_numpys,
    save_embeddings=True,  # Save embeddings to files
)

Loaded and checked ONNX model from: ReDimNet_no_mel_fp16.onnx


[0;93m2025-07-06 07:04:13.732424790 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m
[0;93m2025-07-06 07:04:13.756371632 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m


loading audio from: /data/proj/voice/redimnet/wrkB2/utils/../audio/test000.wav
...Waveform rate 16000  ; shape : torch.Size([1, 293699])
Input waveform shape: torch.Size([1, 32000])
logmelshape :  (1, 1, 72, 134)
Embedding shape: (1, 192)
Loaded and checked ONNX model from: ReDimNet_no_mel_fp16.onnx


[0;93m2025-07-06 07:04:14.760639673 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m
[0;93m2025-07-06 07:04:14.792282496 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m


loading audio from: /data/proj/voice/redimnet/wrkB2/utils/../audio/testRob1.wav
...Waveform rate 22050  ; shape : torch.Size([1, 35522])
Input waveform shape: torch.Size([1, 25776])
Padding log_mel from 108 to 134 frames
logmelshape :  (1, 1, 72, 134)
Embedding shape: (1, 192)
Loaded and checked ONNX model from: ReDimNet_no_mel_fp16.onnx


[0;93m2025-07-06 07:04:15.748404655 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m
[0;93m2025-07-06 07:04:15.777608359 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m


loading audio from: /data/proj/voice/redimnet/wrkB2/utils/../audio/testRob2.wav
...Waveform rate 22050  ; shape : torch.Size([1, 32482])
Input waveform shape: torch.Size([1, 23570])
Padding log_mel from 99 to 134 frames
logmelshape :  (1, 1, 72, 134)
Embedding shape: (1, 192)
Loaded and checked ONNX model from: ReDimNet_no_mel_fp16.onnx


[0;93m2025-07-06 07:04:16.779030998 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m
[0;93m2025-07-06 07:04:16.807158241 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m


loading audio from: /data/proj/voice/redimnet/wrkB2/utils/../audio/test_human1_1.wav
...Waveform rate 16000  ; shape : torch.Size([1, 65867])
Input waveform shape: torch.Size([1, 32000])
logmelshape :  (1, 1, 72, 134)
Embedding shape: (1, 192)
Loaded and checked ONNX model from: ReDimNet_no_mel_fp16.onnx


[0;93m2025-07-06 07:04:17.933234721 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m
[0;93m2025-07-06 07:04:17.959332846 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m


loading audio from: /data/proj/voice/redimnet/wrkB2/utils/../audio/test_human1_2.wav
...Waveform rate 16000  ; shape : torch.Size([1, 101189])
Input waveform shape: torch.Size([1, 32000])
logmelshape :  (1, 1, 72, 134)
Embedding shape: (1, 192)
Loaded and checked ONNX model from: ReDimNet_no_mel_fp16.onnx
loading audio from: /data/proj/voice/redimnet/wrkB2/utils/../audio/test_human2_1.wav
...Waveform rate 48000  ; shape : torch.Size([1, 84376])
Input waveform shape: torch.Size([1, 28126])
Padding log_mel from 118 to 134 frames
logmelshape :  (1, 1, 72, 134)


[0;93m2025-07-06 07:04:19.046292276 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m
[0;93m2025-07-06 07:04:19.082335390 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m


Embedding shape: (1, 192)
Loaded and checked ONNX model from: ReDimNet_no_mel_fp16.onnx


[0;93m2025-07-06 07:04:20.063021728 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m
[0;93m2025-07-06 07:04:20.092041492 [W:onnxruntime:, constant_folding.cc:268 ApplyImpl] Could not find a CPU kernel and hence can't constant fold Sub node '/pool/Sub_1'[m


loading audio from: /data/proj/voice/redimnet/wrkB2/utils/../audio/test_human2_2.wav
...Waveform rate 48000  ; shape : torch.Size([1, 159256])
Input waveform shape: torch.Size([1, 32000])
logmelshape :  (1, 1, 72, 134)
Embedding shape: (1, 192)
**************************************************************************
*************************   compare summary ******************************
**************************************************************************
====>>>> should be similar:
Similarity (robot1 to robot2 ): 0.6698295474052429
Similarity (human1 to human1 ): 0.5776832103729248
Similarity (human2 to human2 ): 0.3911091685295105
====>>>> should be differnet:
Similarity (robot to human1  ): 0.03838629275560379
Similarity (robot to human2  ): -0.07455457746982574
Similarity (human1 to human2 ): 0.046737123280763626


### compare onnx with torch

In [18]:
print(f"Similarity embde0: {myUtils.cosine_similarity_numpys(torch_embedding['embed0'], onnx_embedding['embed0'])}")
print(f"Similarity embde1: {myUtils.cosine_similarity_numpys(torch_embedding['embed1'], onnx_embedding['embed1'])}")
print(f"Similarity embde2: {myUtils.cosine_similarity_numpys(torch_embedding['embed2'], onnx_embedding['embed2'])}")
print(f"Similarity embde3: {myUtils.cosine_similarity_numpys(torch_embedding['embed3'], onnx_embedding['embed3'])}")
print(f"Similarity embde4: {myUtils.cosine_similarity_numpys(torch_embedding['embed4'], onnx_embedding['embed4'])}")
print(f"Similarity embde5: {myUtils.cosine_similarity_numpys(torch_embedding['embed5'], onnx_embedding['embed5'])}")
print(f"Similarity embde6: {myUtils.cosine_similarity_numpys(torch_embedding['embed6'], onnx_embedding['embed6'])}")

Similarity embde0: 0.999998927116394
Similarity embde1: 0.9999983310699463
Similarity embde2: 0.9999985694885254
Similarity embde3: 0.999997615814209
Similarity embde4: 0.9999983310699463
Similarity embde5: 0.9999986886978149
Similarity embde6: 0.9999988079071045


### compare onnx with base line

In [19]:
print(f"Similarity embde0: {myUtils.cosine_similarity_numpys(base_line_embedding['embed0'], onnx_embedding['embed0'])}")
print(f"Similarity embde1: {myUtils.cosine_similarity_numpys(base_line_embedding['embed1'], onnx_embedding['embed1'])}")
print(f"Similarity embde2: {myUtils.cosine_similarity_numpys(base_line_embedding['embed2'], onnx_embedding['embed2'])}")
print(f"Similarity embde3: {myUtils.cosine_similarity_numpys(base_line_embedding['embed3'], onnx_embedding['embed3'])}")
print(f"Similarity embde4: {myUtils.cosine_similarity_numpys(base_line_embedding['embed4'], onnx_embedding['embed4'])}")
print(f"Similarity embde5: {myUtils.cosine_similarity_numpys(base_line_embedding['embed5'], onnx_embedding['embed5'])}")
print(f"Similarity embde6: {myUtils.cosine_similarity_numpys(base_line_embedding['embed6'], onnx_embedding['embed6'])}")

Similarity embde0: 0.9999988079071045
Similarity embde1: 0.9944438338279724
Similarity embde2: 0.9896356463432312
Similarity embde3: 0.9992713332176208
Similarity embde4: 0.9998465776443481
Similarity embde5: 0.997669517993927
Similarity embde6: 0.9999985098838806


## cal fake data

In [20]:
# import os
# import numpy as np
# import torch

# # Directory for calibration inputs
# os.makedirs("calib_npy", exist_ok=True)

# # Create 100 dummy log-mel tensors
# for i in range(10):
#     log_mel = torch.randn(1, 1, 72, 134).numpy().astype(np.float16)
#     np.save(f"calib_npy/sample_{i}.npy", log_mel)

# # Write dataset.txt listing all paths
# with open("dataset.txt", "w") as f:
#     for i in range(10):
#         f.write(f"calib_npy/sample_{i}.npy\n")


## converts

```
python convert.py \
       ../wrkB0/ReDimNet_no_mel_fp16.onnx rk3588 fp ReDimNet_no_mel.rknn \
       ../wrkB0/audio/logmel_testRob1.npy  ../wrkB0/audio/embedding_testRob1.torch

```