# RKNN call


===============================================================

* 

===============================================================

In [1]:
%load_ext autoreload
%autoreload 2
## our utils
from utils.common_import import *

2.6.0+cu124


In [2]:
%%capture --no-display
import my_utils as myUtils

## param

In [3]:
CHUNK_SECS = 2
TARGET_SR = 16000

INPUT_DIR = Path("../3588/cal/inputs")
OUTPUT_DIR = Path("../3588/cal/logmels")
DATASET_TXT = Path("../3588/cal/dataset.txt")



In [4]:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
with open(DATASET_TXT, "w") as f:
    pass  # clear it

## get some voice source for cal

In [5]:
from torchaudio.datasets import YESNO


## run on all WAVs

* src examples:

```
aws s3 ls --no-sign-request s3://lab41openaudiocorpus/

2020-02-26 02:05:00 20946282205 VOiCES_competition.tar.gz    //30G
2020-02-26 02:05:00 29543349376 VOiCES_devkit.tar.gz
2020-02-26 02:05:00 448753406236 VOiCES_release.tar.gz
2020-04-17 02:32:28   59238743 recording_data.tar.gz
2020-03-11 18:45:37          0 test.txt

aws s3 cp --no-sign-request   s3://lab41openaudiocorpus/VOiCES_competition.tar.gz .  

```

In [6]:
total_chunks = 0
for wav_path in sorted(INPUT_DIR.glob("*")):
    try:
        waveform, sr = torchaudio.load(wav_path)

        # mono
        if waveform.shape[0] > 1:
            waveform = waveform.mean(dim=0, keepdim=True)

        # resample
        if sr != TARGET_SR:
            resampler = T.Resample(orig_freq=sr, new_freq=TARGET_SR)
            waveform = resampler(waveform)

        wave = waveform.squeeze(0).numpy()  # to shape [samples]
        chunk_len = int(CHUNK_SECS * TARGET_SR)
        n_chunks = len(wave) // chunk_len

        if n_chunks == 0:
            print(f"⚠ Skipping {wav_path.name}: shorter than one chunk")
            continue

        print(f"[INFO] {wav_path.name}: {n_chunks} chunks of {CHUNK_SECS}s")

        for i in range(n_chunks):
            chunk = wave[i * chunk_len : (i + 1) * chunk_len]
            chunk_tensor = torch.tensor(chunk, dtype=torch.float32).unsqueeze(0)  # [1, N]
            logmel_fp16 = myUtils.waveform_to_logmel(chunk_tensor).cpu().numpy().astype(np.float16)

            out_name = f"{wav_path.stem}_chunk{i:02d}.npy"
            out_path = OUTPUT_DIR / out_name
            np.save(out_path, logmel_fp16)

            relative_path = out_path.relative_to(DATASET_TXT.parent)
            with open(DATASET_TXT, "a") as ds:
                ds.write(str(relative_path) + "\n")

            print(f"  ✔ Chunk {i}: {out_name}")
            total_chunks += 1

    except Exception as e:
        print(f"✖ Error in {wav_path.name}: {e}")

print(f"\n✔ All done – Total chunks processed: {total_chunks}")

[INFO] common_voice_en_41910499.mp3: 1 chunks of 2s
Input waveform shape: torch.Size([1, 32000])
  ✔ Chunk 0: common_voice_en_41910499_chunk00.npy
[INFO] common_voice_en_41910500.mp3: 3 chunks of 2s
Input waveform shape: torch.Size([1, 32000])
  ✔ Chunk 0: common_voice_en_41910500_chunk00.npy
Input waveform shape: torch.Size([1, 32000])
  ✔ Chunk 1: common_voice_en_41910500_chunk01.npy
Input waveform shape: torch.Size([1, 32000])
  ✔ Chunk 2: common_voice_en_41910500_chunk02.npy
[INFO] common_voice_en_41910501.mp3: 3 chunks of 2s
Input waveform shape: torch.Size([1, 32000])
  ✔ Chunk 0: common_voice_en_41910501_chunk00.npy
Input waveform shape: torch.Size([1, 32000])
  ✔ Chunk 1: common_voice_en_41910501_chunk01.npy
Input waveform shape: torch.Size([1, 32000])
  ✔ Chunk 2: common_voice_en_41910501_chunk02.npy
[INFO] common_voice_en_41910502.mp3: 2 chunks of 2s
Input waveform shape: torch.Size([1, 32000])
  ✔ Chunk 0: common_voice_en_41910502_chunk00.npy
Input waveform shape: torch.Size