Dataset Extraction

In [9]:
!rm -r /content/dataset
!unzip dataset.zip
!rm -r __MACOSX

Archive:  dataset.zip
   creating: dataset/
  inflating: __MACOSX/._dataset      
  inflating: dataset/.DS_Store       
  inflating: __MACOSX/dataset/._.DS_Store  
   creating: dataset/others/
   creating: dataset/user_0/
  inflating: dataset/others/yes.0a196374_nohash_1.wav  
  inflating: __MACOSX/dataset/others/._yes.0a196374_nohash_1.wav  
  inflating: dataset/others/backward.0b7ee1a0_nohash_0.wav  
  inflating: __MACOSX/dataset/others/._backward.0b7ee1a0_nohash_0.wav  
  inflating: dataset/others/cat.0e6e36c9_nohash_0.wav  
  inflating: __MACOSX/dataset/others/._cat.0e6e36c9_nohash_0.wav  
  inflating: dataset/others/down.0a2b400e_nohash_3.wav  
  inflating: __MACOSX/dataset/others/._down.0a2b400e_nohash_3.wav  
  inflating: dataset/others/seven.0a0b46ae_nohash_0.wav  
  inflating: __MACOSX/dataset/others/._seven.0a0b46ae_nohash_0.wav  
  inflating: dataset/others/stop.0a196374_nohash_2.wav  
  inflating: __MACOSX/dataset/others/._stop.0a196374_nohash_2.wav  
  inflating: dataset/o

Dividing the files in the desired folders, that will then be processed in .npy files

Division and organization:

In [10]:
import os
import shutil
import random
from tqdm import tqdm

def organize_wav_files(target_source_dir, other_source_dir, output_dir, target_user_id=0):

    os.makedirs(output_dir, exist_ok=True)
    subfolders = {
        "validation": (60, 120),
        "testing": (60, 120),
        "train_1": (1, 0),
        "train_8": (8, 0),
        "train_16": (16, 0),
        "train_64": (64, 0),
        "remain": (-1, -1)
    }

    for folder in subfolders:
        os.makedirs(os.path.join(output_dir, folder), exist_ok=True)

    target_files = [f for f in os.listdir(target_source_dir)
                   if f.endswith('.wav') and os.path.isfile(os.path.join(target_source_dir, f))]
    random.shuffle(target_files)

    other_files = []
    for root, _, files in os.walk(other_source_dir):
        for file in files:
            if file.endswith('.wav'):
                other_files.append(os.path.join(root, file))
    random.shuffle(other_files)

    required_target = sum(v[0] for k, v in subfolders.items() if k != "remain")
    required_other = sum(v[1] for k, v in subfolders.items() if k != "remain")

    if len(target_files) < required_target:
        raise ValueError(f"Need {required_target} target files, found {len(target_files)}")
    if len(other_files) < required_other:
        raise ValueError(f"Need {required_other} non-target files, found {len(other_files)}")

    target_idx = 0
    other_idx = 0

    for folder, (target_count, other_count) in subfolders.items():
        if folder == "remain":
            continue

        for i in range(target_count):
            src = os.path.join(target_source_dir, target_files[target_idx])
            dst = os.path.join(output_dir, folder, f"target_{target_files[target_idx]}")
            shutil.copy(src, dst)
            target_idx += 1

        for i in range(other_count):
            src = other_files[other_idx]
            dst = os.path.join(output_dir, folder, f"other_{os.path.basename(src)}")
            shutil.copy(src, dst)
            other_idx += 1

    for remaining_target in target_files[target_idx:]:
        src = os.path.join(target_source_dir, remaining_target)
        dst = os.path.join(output_dir, "remain", f"target_{remaining_target}")
        shutil.copy(src, dst)

    for remaining_other in other_files[other_idx:]:
        dst = os.path.join(output_dir, "remain", f"other_{os.path.basename(remaining_other)}")
        shutil.copy(remaining_other, dst)

    used_target = target_idx
    used_other = other_idx
    remaining_target = len(target_files) - target_idx
    remaining_other = len(other_files) - other_idx

    print("\nOrganization complete.")
    print(f"Used {used_target} target files and {used_other} non-target files")
    print(f"Remaining target files moved to 'remain': {remaining_target}")
    print(f"Remaining non-target files moved to 'remain': {remaining_other}")
    print("\nFinal counts per folder:")
    for folder in subfolders:
        if folder == "remain":
            target_count = len([f for f in os.listdir(os.path.join(output_dir, folder))
                             if f.startswith('target_')])
            other_count = len([f for f in os.listdir(os.path.join(output_dir, folder))
                            if f.startswith('other_')])
            print(f"{folder}: {target_count} target, {other_count} non-target (remaining)")
        else:
            expected_target, expected_other = subfolders[folder]
            actual_target = len([f for f in os.listdir(os.path.join(output_dir, folder))
                              if f.startswith('target_')])
            actual_other = len([f for f in os.listdir(os.path.join(output_dir, folder))
                             if f.startswith('other_')])
            print(f"{folder}: {actual_target}/{expected_target} target, {actual_other}/{expected_other} non-target")

if __name__ == "__main__":
    target_speaker_dir = "/content/dataset/user_0/"
    other_speakers_dir = "/content/dataset/others/"
    output_directory = "/content/dataset/user_0_organized/"

    organize_wav_files(
        target_source_dir=target_speaker_dir,
        other_source_dir=other_speakers_dir,
        output_dir=output_directory,
        target_user_id=0
    )

    !rm -r /content/dataset/user_0
    !rm -r /content/dataset/others


Organization complete.
Used 209 target files and 240 non-target files
Remaining target files moved to 'remain': 27
Remaining non-target files moved to 'remain': 248

Final counts per folder:
validation: 60/60 target, 120/120 non-target
testing: 60/60 target, 120/120 non-target
train_1: 1/1 target, 0/0 non-target
train_8: 8/8 target, 0/0 non-target
train_16: 16/16 target, 0/0 non-target
train_64: 64/64 target, 0/0 non-target
remain: 27 target, 248 non-target (remaining)


MFE block conversion and .npz file generation

In [11]:
import os
import shutil
import numpy as np
import librosa
from tqdm import tqdm
import matplotlib.pyplot as plt
from scipy.fftpack import dct
from scipy.signal import get_window
import soundfile as sf
from collections import defaultdict

SAMPLE_RATE = 16000
FRAME_DUR = 0.032
FRAME_SIZE = int(SAMPLE_RATE * FRAME_DUR)
FRAME_STRIDE_DUR = 0.024
FRAME_STRIDE = int(SAMPLE_RATE * FRAME_STRIDE_DUR)
NUM_BINS = FRAME_SIZE // 2
FILTER_NUMBER = 40
MIN_FREQ = 0
MAX_FREQ = SAMPLE_RATE // 2
COEFFICIENT = 0.96875
NOISE_FLOOR = -40.0

def remove_all_folders_except(parent_dir, folder_to_keep):
    keep_path = os.path.join(parent_dir, folder_to_keep)
    if not os.path.exists(keep_path):
        print(f"Warning: '{folder_to_keep}' doesn't exist in {parent_dir}")
        return

    for item in os.listdir(parent_dir):
        item_path = os.path.join(parent_dir, item)
        if os.path.isdir(item_path) and item != folder_to_keep:
            print(f"Removing: {item_path}")
            try:
                shutil.rmtree(item_path)
            except Exception as e:
                print(f"Failed to remove {item_path}: {e}")

def pre_emphasis(audio):
    emphasized = np.zeros_like(audio, dtype=np.float32)
    emphasized[0] = audio[0] / 32768.0
    for i in range(1, len(audio)):
        emphasized[i] = (audio[i] / 32768.0) - COEFFICIENT * (audio[i-1] / 32768.0)
    return emphasized

def apply_windowing(frame):
    window = 0.54 - 0.46 * np.cos(2 * np.pi * np.arange(len(frame)) / (len(frame) - 1))
    return frame * window

def hz_to_mel(hz):
    return 1127.0 * np.log10(1 + hz / 700.0)

def mel_to_hz(mel):
    return 700 * (10 ** (mel / 1127.0) - 1)

def create_mel_filterbank():
    min_mel = hz_to_mel(MIN_FREQ)
    max_mel = hz_to_mel(MAX_FREQ)
    #mel_points = np.linspace(min_mel, max_mel, FILTER_NUMBER + 2)
    #hz_points = mel_to_hz(mel_points)
    mel_points = np.zeros(FILTER_NUMBER + 2)
    mel_spacing = (max_mel - min_mel) / (FILTER_NUMBER + 1)
    for i in range(FILTER_NUMBER + 2):
        mel_points[i] = mel_to_hz(min_mel + i * mel_spacing)
        if mel_points[i] > MAX_FREQ:
            mel_points[i] = MAX_FREQ

    #bin_indices = np.floor((NUM_BINS) * hz_points / (SAMPLE_RATE / 2)).astype(int)
    #bin_indices = np.clip(bin_indices, 0, NUM_BINS - 1)
    bin_indices = np.zeros(FILTER_NUMBER + 2, dtype=int)
    for i in range(FILTER_NUMBER + 2):
        bin_indices[i] = int(mel_points[i] * (NUM_BINS - 1) / (SAMPLE_RATE / 2.0))
        bin_indices[i] = max(0, min(NUM_BINS - 1, bin_indices[i]))

    filterbank = np.zeros((FILTER_NUMBER, NUM_BINS))

    for i in range(FILTER_NUMBER):
        left = bin_indices[i]
        middle = bin_indices[i+1]
        right = bin_indices[i+2]

        if left == middle:
            middle = min(left + 1, NUM_BINS - 1)
        if middle == right:
            right = min(middle + 1, NUM_BINS - 1)

        #filterbank[i, left:middle] = np.linspace(0, 1, middle - left)
        for j in range(left, middle):
            filterbank[i, j] = (j - left) / (middle - left)

        #filterbank[i, middle:right] = np.linspace(1, 0, right - middle)
        for j in range(middle, right):
            filterbank[i, j] = 1.0 - (j - middle) / (right - middle)
    return filterbank

def compute_spectrogram(audio, show_plot=False):
    num_samples = len(audio)
    total_duration = num_samples / SAMPLE_RATE
    num_frames_full_second = int((total_duration - FRAME_DUR) / FRAME_STRIDE_DUR) + 1
    num_frames = min(num_frames_full_second, 40)
    pre_emphasis_array = pre_emphasis(audio)
    spectrogram = np.zeros((num_frames, NUM_BINS))

    for frame in range(num_frames):
        start = frame * FRAME_STRIDE
        end = start + FRAME_SIZE
        segment = pre_emphasis_array[start:end]
        if len(segment) < FRAME_SIZE:
            segment = np.pad(segment, (0, FRAME_SIZE - len(segment)))

        windowed = apply_windowing(segment)
        fft = np.fft.rfft(windowed, n=FRAME_SIZE)
        magnitude = np.abs(fft)
        spectrogram[frame] = magnitude[:NUM_BINS]

    mel_filterbank = create_mel_filterbank()
    mel_spectrogram = np.dot(spectrogram, mel_filterbank.T)
    log_mel_spectrogram = 10* np.log10(mel_spectrogram + 1e-20)

    log_mel_spectrogram = (log_mel_spectrogram - NOISE_FLOOR) / (-NOISE_FLOOR + 12)
    log_mel_spectrogram = np.clip(log_mel_spectrogram, 0, 1)
    quantized = np.round(log_mel_spectrogram * 256) / 256.0
    quantized = np.where(quantized >= 0.65, quantized, 0)
    quantized = quantized[:40]

    if show_plot:
        plt.figure(figsize=(10, 6))
        time_axis = np.linspace(0, 0.968, 40)
        plt.imshow(quantized.T, aspect='auto', origin='lower',
                  extent=[0, 0.968, 0, FILTER_NUMBER])
        plt.colorbar(label='Magnitude')
        plt.xlabel('Time (s)')
        plt.ylabel('Mel filter index')
        plt.title('40x40 Mel Spectrogram (0.968s duration)')
        plt.show()

    return quantized

import os
import shutil
import numpy as np
import librosa
from tqdm import tqdm
import matplotlib.pyplot as plt
import soundfile as sf
from collections import defaultdict

def process_folder_to_npz(folder_path, output_npz_path, target_user_id, remain_folder=None, is_training=False):
    """Process all WAV files in a folder, using remain folder as fallback"""
    features = []
    labels = []
    filenames = []
    remain_files_used = 0

    wav_files = [f for f in os.listdir(folder_path) if f.endswith('.wav')]

    remain_files = []
    if remain_folder and os.path.exists(remain_folder):
        if is_training:
            remain_files = [f for f in os.listdir(remain_folder)
                          if f.endswith('.wav') and f.startswith('target_')
                          and os.path.isfile(os.path.join(remain_folder, f))]
        else:
            remain_files = [f for f in os.listdir(remain_folder)
                          if f.endswith('.wav') and os.path.isfile(os.path.join(remain_folder, f))]

        random.shuffle(remain_files)

    for wav_file in tqdm(wav_files, desc=f"Processing {os.path.basename(folder_path)}"):
        audio_path = os.path.join(folder_path, wav_file)
        success = False

        try:
            audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
            audio_int16 = (audio * 32767).astype(np.int16)

            if len(audio_int16) == SAMPLE_RATE:
                mfe = compute_spectrogram(audio_int16)
                mfe = mfe[..., np.newaxis]
                features.append(mfe)

                label = target_user_id if wav_file.startswith('target_') else -1
                labels.append(label)
                filenames.append(wav_file)
                success = True
            else:
                print(f"Duration mismatch: {wav_file} has {len(audio_int16)/SAMPLE_RATE:.2f}s (expected 1.0)")
        except Exception as e:
            print(f"Error processing {wav_file}: {str(e)}")

        if not success and remain_files:
            remain_file = remain_files.pop()
            remain_path = os.path.join(remain_folder, remain_file)

            try:
                audio, sr = librosa.load(remain_path, sr=SAMPLE_RATE, mono=True)
                audio_int16 = (audio * 32767).astype(np.int16)

                if len(audio_int16) == SAMPLE_RATE:
                    mfe = compute_spectrogram(audio_int16)
                    mfe = mfe[..., np.newaxis]
                    features.append(mfe)

                    if is_training:
                        label = target_user_id
                    else:
                        label = target_user_id if remain_file.startswith('target_') else -1

                    labels.append(label)
                    filenames.append(f"remain_replacement_{remain_file}")
                    remain_files_used += 1
                    success = True
                else:
                    print(f"Remain file duration mismatch: {remain_file}")
            except Exception as e:
                print(f"Error processing remain file {remain_file}: {str(e)}")

        if not success:
            print(f"Could not process {wav_file} and no valid remain files available")

    features_array = np.array(features, dtype=np.float32)
    labels_array = np.array(labels, dtype=np.int32)

    np.savez_compressed(
        output_npz_path,
        features=features_array,
        filenames=np.array(filenames),
        labels=labels_array
    )

    print(f"\nSaved {len(features)} segments to {output_npz_path}")
    print(f"Class distribution: Target={np.sum(labels_array == target_user_id)}, Non-target={np.sum(labels_array != target_user_id)}")
    if remain_files_used > 0:
        print(f"Used {remain_files_used} files from remain folder as replacements")

def process_all_folders(base_dir, target_user_id=0):
    """Process all subfolders in the organized directory"""
    subfolders = [
        "train_1",
        "train_8",
        "train_16",
        "train_64",
        "validation",
        "testing"
    ]

    output_dir = os.path.join(base_dir, "npz_features")
    os.makedirs(output_dir, exist_ok=True)

    # Path to remain folder
    remain_folder = os.path.join(base_dir, "remain")

    for folder in subfolders:
        folder_path = os.path.join(base_dir, folder)
        if os.path.exists(folder_path):
            if "train" in folder:
                output_filename = f"{folder}_{target_user_id}_features.npz"
                is_training = True
            else:
                output_filename = f"{folder}_features.npz"
                is_training = False

            output_path = os.path.join(output_dir, output_filename)

            # Process folder with remain folder as fallback
            process_folder_to_npz(
                folder_path,
                output_path,
                target_user_id,
                remain_folder=remain_folder,
                is_training=is_training
            )

if __name__ == "__main__":
    organized_dir = "/content/dataset/user_0_organized"
    target_speaker_id = 0

    print("Verifying folder structure...")
    for folder in ["validation", "testing", "train_1", "train_8", "train_16", "train_64", "remain"]:
        path = os.path.join(organized_dir, folder)
        if os.path.exists(path):
            files = [f for f in os.listdir(path) if f.endswith('.wav')]
            print(f"{folder}: {len(files)} files")

    process_all_folders(organized_dir, target_user_id=target_speaker_id)
    remove_all_folders_except(parent_dir=organized_dir, folder_to_keep="npz_features")

Verifying folder structure...
validation: 180 files
testing: 180 files
train_1: 1 files
train_8: 8 files
train_16: 16 files
train_64: 64 files
remain: 275 files


Processing train_1: 100%|██████████| 1/1 [00:00<00:00, 14.15it/s]



Saved 1 segments to /content/dataset/user_0_organized/npz_features/train_1_0_features.npz
Class distribution: Target=1, Non-target=0


Processing train_8: 100%|██████████| 8/8 [00:00<00:00, 14.79it/s]



Saved 8 segments to /content/dataset/user_0_organized/npz_features/train_8_0_features.npz
Class distribution: Target=8, Non-target=0


Processing train_16: 100%|██████████| 16/16 [00:01<00:00, 14.27it/s]



Saved 16 segments to /content/dataset/user_0_organized/npz_features/train_16_0_features.npz
Class distribution: Target=16, Non-target=0


Processing train_64: 100%|██████████| 64/64 [00:04<00:00, 14.52it/s]

Duration mismatch: target_user_0.5nd9qqrk.ingestion-856ccfd98c-7psd8.wav has 100.00s (expected 1.0)






Saved 64 segments to /content/dataset/user_0_organized/npz_features/train_64_0_features.npz
Class distribution: Target=64, Non-target=0
Used 1 files from remain folder as replacements


Processing validation:   3%|▎         | 6/180 [00:00<00:11, 14.69it/s]

Duration mismatch: other_six.0a5636ca_nohash_0.wav has 0.88s (expected 1.0)


Processing validation:   8%|▊         | 14/180 [00:00<00:11, 14.62it/s]

Duration mismatch: other_bird.0a5636ca_nohash_0.wav has 0.84s (expected 1.0)


Processing validation:  17%|█▋        | 30/180 [00:02<00:10, 14.21it/s]

Duration mismatch: other_learn.0b57a6ed_nohash_0.wav has 0.68s (expected 1.0)


Processing validation:  35%|███▌      | 63/180 [00:05<00:11,  9.86it/s]

Duration mismatch: other_on.0a7c2a8d_nohash_0.wav has 0.98s (expected 1.0)
Remain file duration mismatch: other_left.0a9f9af7_nohash_0.wav
Could not process other_on.0a7c2a8d_nohash_0.wav and no valid remain files available


Processing validation:  38%|███▊      | 68/180 [00:05<00:13,  8.15it/s]

Duration mismatch: other_seven.0a5636ca_nohash_0.wav has 0.70s (expected 1.0)


Processing validation:  43%|████▎     | 78/180 [00:06<00:07, 13.02it/s]

Duration mismatch: other_on.0a9f9af7_nohash_1.wav has 0.85s (expected 1.0)
Duration mismatch: other_seven.0ab3b47d_nohash_0.wav has 0.85s (expected 1.0)
Duration mismatch: other_five.0a396ff2_nohash_0.wav has 0.55s (expected 1.0)


Processing validation:  47%|████▋     | 84/180 [00:07<00:06, 14.24it/s]

Duration mismatch: other_up.0ab3b47d_nohash_1.wav has 0.81s (expected 1.0)


Processing validation:  54%|█████▍    | 98/180 [00:07<00:05, 14.90it/s]

Duration mismatch: other_cat.0bfec55f_nohash_0.wav has 0.77s (expected 1.0)


Processing validation:  61%|██████    | 109/180 [00:08<00:04, 16.95it/s]

Duration mismatch: other_tree.0b09edd3_nohash_0.wav has 0.64s (expected 1.0)
Remain file duration mismatch: other_eight.0a9f9af7_nohash_0.wav
Could not process other_tree.0b09edd3_nohash_0.wav and no valid remain files available


Processing validation:  66%|██████▌   | 119/180 [00:09<00:04, 15.00it/s]

Duration mismatch: other_sheila.0b09edd3_nohash_1.wav has 0.81s (expected 1.0)


Processing validation:  72%|███████▏  | 130/180 [00:10<00:02, 16.74it/s]

Duration mismatch: other_happy.0a5636ca_nohash_0.wav has 0.88s (expected 1.0)
Remain file duration mismatch: other_go.0ab3b47d_nohash_0.wav
Could not process other_happy.0a5636ca_nohash_0.wav and no valid remain files available


Processing validation:  78%|███████▊  | 140/180 [00:10<00:02, 14.96it/s]

Duration mismatch: other_one.0a5636ca_nohash_0.wav has 0.70s (expected 1.0)


Processing validation:  84%|████████▍ | 152/180 [00:11<00:01, 14.45it/s]

Duration mismatch: other_wow.0a5636ca_nohash_0.wav has 0.79s (expected 1.0)


Processing validation:  93%|█████████▎| 168/180 [00:12<00:00, 14.71it/s]

Duration mismatch: other_go.0a9f9af7_nohash_1.wav has 0.85s (expected 1.0)


Processing validation:  98%|█████████▊| 176/180 [00:13<00:00, 14.31it/s]

Duration mismatch: other_marvin.0a5636ca_nohash_1.wav has 0.98s (expected 1.0)


Processing validation: 100%|██████████| 180/180 [00:13<00:00, 13.33it/s]

Duration mismatch: other_right.0a5636ca_nohash_0.wav has 0.70s (expected 1.0)






Saved 177 segments to /content/dataset/user_0_organized/npz_features/validation_features.npz
Class distribution: Target=62, Non-target=115
Used 15 files from remain folder as replacements


Processing testing:   1%|          | 2/180 [00:00<00:11, 15.16it/s]

Duration mismatch: other_five.0a396ff2_nohash_1.wav has 0.51s (expected 1.0)


Processing testing:  17%|█▋        | 30/180 [00:02<00:10, 14.54it/s]

Duration mismatch: target_user_0.5nd9rh04.ingestion-856ccfd98c-f8vjf.wav has 15.00s (expected 1.0)


Processing testing:  20%|██        | 36/180 [00:02<00:09, 14.76it/s]

Duration mismatch: other_nine.0a5636ca_nohash_0.wav has 0.93s (expected 1.0)


Processing testing:  22%|██▏       | 40/180 [00:02<00:14, 10.00it/s]

Duration mismatch: other_nine.0a9f9af7_nohash_0.wav has 0.85s (expected 1.0)


Processing testing:  23%|██▎       | 42/180 [00:03<00:15,  9.18it/s]

Duration mismatch: other_marvin.0a9f9af7_nohash_1.wav has 0.68s (expected 1.0)
Duration mismatch: other_seven.0a9f9af7_nohash_0.wav has 0.68s (expected 1.0)


Processing testing:  26%|██▌       | 46/180 [00:03<00:13,  9.87it/s]

Duration mismatch: other_go.0a9f9af7_nohash_0.wav has 0.90s (expected 1.0)
Remain file duration mismatch: other_marvin.0a9f9af7_nohash_0.wav
Could not process other_go.0a9f9af7_nohash_0.wav and no valid remain files available


Processing testing:  27%|██▋       | 49/180 [00:04<00:14,  8.85it/s]

Duration mismatch: other_left.0a5636ca_nohash_0.wav has 0.84s (expected 1.0)


Processing testing:  29%|██▉       | 53/180 [00:04<00:15,  8.00it/s]

Duration mismatch: other_sheila.0a5636ca_nohash_0.wav has 0.84s (expected 1.0)


Processing testing:  32%|███▏      | 57/180 [00:05<00:16,  7.63it/s]

Duration mismatch: other_bird.0a396ff2_nohash_0.wav has 0.47s (expected 1.0)


Processing testing:  37%|███▋      | 66/180 [00:05<00:08, 12.91it/s]

Duration mismatch: other_sheila.0c09f202_nohash_0.wav has 0.77s (expected 1.0)


Processing testing:  57%|█████▋    | 102/180 [00:08<00:05, 14.76it/s]

Duration mismatch: other_six.0a9f9af7_nohash_0.wav has 0.73s (expected 1.0)


Processing testing:  61%|██████    | 109/180 [00:08<00:04, 16.98it/s]

Duration mismatch: other_down.0a9f9af7_nohash_1.wav has 0.98s (expected 1.0)
Remain file duration mismatch: other_five.0a9f9af7_nohash_1.wav
Could not process other_down.0a9f9af7_nohash_1.wav and no valid remain files available


Processing testing:  65%|██████▌   | 117/180 [00:09<00:04, 14.94it/s]

Duration mismatch: other_cat.0c540988_nohash_0.wav has 0.77s (expected 1.0)


Processing testing:  67%|██████▋   | 121/180 [00:09<00:03, 14.76it/s]

Duration mismatch: other_go.0a5636ca_nohash_0.wav has 0.79s (expected 1.0)
Duration mismatch: other_wow.0ab3b47d_nohash_0.wav has 0.90s (expected 1.0)


Processing testing:  73%|███████▎  | 131/180 [00:10<00:03, 14.44it/s]

Duration mismatch: other_two.0a5636ca_nohash_0.wav has 0.74s (expected 1.0)


Processing testing:  82%|████████▏ | 147/180 [00:11<00:02, 14.37it/s]

Duration mismatch: other_bird.0a9f9af7_nohash_2.wav has 0.94s (expected 1.0)
Duration mismatch: other_on.0a5636ca_nohash_1.wav has 0.88s (expected 1.0)


Processing testing:  88%|████████▊ | 159/180 [00:12<00:01, 13.58it/s]

Duration mismatch: other_visual.0b139000_nohash_0.wav has 0.85s (expected 1.0)
Duration mismatch: other_happy.0ab3b47d_nohash_0.wav has 0.90s (expected 1.0)


Processing testing: 100%|██████████| 180/180 [00:13<00:00, 13.21it/s]


Saved 178 segments to /content/dataset/user_0_organized/npz_features/testing_features.npz
Class distribution: Target=60, Non-target=118
Used 19 files from remain folder as replacements
Removing: /content/dataset/user_0_organized/train_1
Removing: /content/dataset/user_0_organized/testing
Removing: /content/dataset/user_0_organized/train_16
Removing: /content/dataset/user_0_organized/train_64
Removing: /content/dataset/user_0_organized/validation
Removing: /content/dataset/user_0_organized/train_8
Removing: /content/dataset/user_0_organized/remain





Code for verifying the structure of .npz files

In [12]:
import numpy as np
import os
import matplotlib.pyplot as plt

def verify_npz_files(directory="/content/dataset/user_0_organized/npz_features"):
    """Verify contents of .npz files in a directory."""
    npz_files = [f for f in os.listdir(directory) if f.endswith('.npz')]

    if not npz_files:
        print(f"No .npz files found in {directory}!")
        return

    print(f"Found {len(npz_files)} .npz files in {directory}:\n")

    for file in sorted(npz_files):
        filepath = os.path.join(directory, file)
        data = np.load(filepath)

        print(f"File: {file}")
        print(f"Number of arrays stored: {len(data.files)}\n")

        for array_name in data.files:
            array_data = data[array_name]

            print(f"  Array: '{array_name}'")
            if(array_name=='features'):
              print(f"    - Shape: {array_data.shape}")
              print(f"    - Dtype: {array_data.dtype}")
              print(f"    - Min: {np.min(array_data):.4f}, Max: {np.max(array_data):.4f}, Mean: {np.mean(array_data):.4f}")
              print(f"    - Size: {array_data.size} elements\n")

        print("-" * 50 + "\n")

if __name__ == "__main__":
    verify_npz_files()

Found 6 .npz files in /content/dataset/user_0_organized/npz_features:

File: testing_features.npz
Number of arrays stored: 3

  Array: 'features'
    - Shape: (178, 40, 40, 1)
    - Dtype: float32
    - Min: 0.0000, Max: 1.0000, Mean: 0.2328
    - Size: 284800 elements

  Array: 'filenames'
  Array: 'labels'
--------------------------------------------------

File: train_16_0_features.npz
Number of arrays stored: 3

  Array: 'features'
    - Shape: (16, 40, 40, 1)
    - Dtype: float32
    - Min: 0.0000, Max: 1.0000, Mean: 0.2566
    - Size: 25600 elements

  Array: 'filenames'
  Array: 'labels'
--------------------------------------------------

File: train_1_0_features.npz
Number of arrays stored: 3

  Array: 'features'
    - Shape: (1, 40, 40, 1)
    - Dtype: float32
    - Min: 0.0000, Max: 1.0000, Mean: 0.1644
    - Size: 1600 elements

  Array: 'filenames'
  Array: 'labels'
--------------------------------------------------

File: train_64_0_features.npz
Number of arrays stored: 3


Bestmatching

In [15]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import math

import sys

import random
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix

from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

import shutil
tfk = tf.keras
tfkl = tf.keras.layers

print(tf.__version__)

seed = 22

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

dv_model=None
auth_class=0

def cosine_similarity(vec1, vec2):
  dot_product = np.dot(vec1, vec2)
  norm_vec1 = np.linalg.norm(vec1)
  norm_vec2 = np.linalg.norm(vec2)
  return dot_product / (norm_vec1 * norm_vec2)

def compute_similarity(input_vector, d_vectors):
  similarities = []

  for dv in d_vectors:
    similarity = cosine_similarity(input_vector, dv)
    similarities.append(similarity)

  return max(similarities)

def predictDVector(d_vectors,authlabel,input_data, input_labels, threshold, verbose=True):
  input_vectors = dv_model.predict(input_data)
  total = len(input_vectors)
  total_auth = 0
  total_denied = 0

  for i in range(len(input_labels)):
    if(input_labels[i]!=auth_class):
      total_denied = total_denied+1
    else:
      total_auth = total_auth + 1

  correct_auth=0
  correct_denied=0

  for i in range(len(input_vectors)):
    similarity=compute_similarity(input_vectors[i], d_vectors)
    result = " -- ERROR!"
    if(similarity>threshold and input_labels[i] == authlabel):
      correct_auth = correct_auth + 1
      result = ""
    if(similarity<=threshold and input_labels[i] != authlabel):
      correct_denied = correct_denied + 1
      result = ""
    if(verbose):
      print("similarity: " + str(similarity) + " --- Class: " + str(input_labels[i]) + " " + result)
  correct = correct_auth + correct_denied

  print('-----------------------')
  print(" --- Testing Results ---")
  true_positive = correct_auth
  false_positive = total_denied - correct_denied
  false_negative = total_auth - correct_auth
  prec = true_positive / (true_positive + false_positive)
  recall = true_positive / (true_positive + false_negative)

  print("True Positive Rate: " + str(correct_auth) + "/" + str(total_auth) + " (" + str(correct_auth*100/total_auth) + "%)")
  print("False Positive Rate: " + str(false_positive) + "/" + str(total_denied) + " (" + str((false_positive)*100/total_denied) + "%)")
  print("Precision: " + str(prec))
  print("Recall: " + str(recall))
  print('******************')
  print("Total correct " + str(correct) + "/" + str(total))
  acc = correct/total
  f1score = 2*prec*recall/(prec+recall)
  print("Accuracy on this dataset: " + str(acc))
  print("F1-Score on this dataset: " + str(f1score))

  return acc, f1score

def provide_predictions(d_vectors, input_data):
  y_predictions_prob = np.zeros((len(input_data), 1))
  input_vectors = dv_model.predict(input_data)
  for i in range(len(input_vectors)):
    similarity=compute_similarity(input_vectors[i], d_vectors)
    y_predictions_prob[i] = similarity
  return y_predictions_prob

def evaluate_model(auth_class, train_size):
  print("Testing with speaker id: " + str(auth_class) + " and train size: " + str(train_size))

  train_dir = f"dataset/user_0_organized/npz_features/train_{train_size}_{auth_class}_features.npz"
  training_npz = np.load(train_dir)
  x_train = training_npz['features']

  val_dir = "dataset/user_0_organized/npz_features/validation_features.npz"
  validation_npz = np.load(val_dir)
  x_val, y_val = validation_npz['features'], validation_npz['labels']

  print("Validation class distribution:", np.unique(y_val, return_counts=True))

  testing_dir = "dataset/user_0_organized/npz_features/testing_features.npz"
  testing_npz = np.load(testing_dir)
  x_test, y_test = testing_npz['features'], testing_npz['labels']

  print("=== Dataset Summary ===")
  print(f"Training: {len(x_train)} samples (should include both classes)")
  print(f"Validation: {len(x_val)} samples - Classes: {np.unique(y_val, return_counts=True)}")
  print(f"Testing: {len(x_test)} samples - Classes: {np.unique(y_test, return_counts=True)}")

  d_vectors = dv_model.predict(x_train.reshape(train_size,40,40,1))
  print(d_vectors.shape)

  save_path = f"d_vectors_{auth_class}_{train_size}.npz"
  np.savez(save_path, labels=y_val, d_vectors=d_vectors)
  print(f"Saved d_vectors and labels to {save_path}")

  y_pred_prob = provide_predictions(d_vectors, x_val)

  y_val_bin = np.where(y_val == auth_class, 1, 0)

  for i,classvalue in enumerate(y_val):
    if(classvalue!=auth_class):
      y_val_bin[i] = 0

  if len(np.unique(y_val_bin)) > 1:
    fpr, tpr, thresholds = roc_curve(y_val_bin, y_pred_prob)
    roc_auc = auc(fpr, tpr)
    print("-----")

    print("Plotting the Receiving Operating Characteristic curve:")
    '''
    # Plot ROC curve
    plt.plot(fpr, tpr, 'b', label='AUC = %0.2f'% roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0,1],[0,1],'r--')
    plt.xlim([0,1])
    plt.ylim([0,1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()
    '''
    fnr = 1 - tpr
    try:
      eer_threshold = thresholds[np.nanargmin(np.absolute((fnr - fpr)))]
      print("-----")
      print("EER Threshold: ", eer_threshold)
      abs_diffs = np.abs(fpr - fnr)
      min_index = np.argmin(abs_diffs)
      EER = np.mean((fpr[min_index], fnr[min_index]))
    except:
      eer_threshold=0.5
      EER=1.0
      print(print("Warning: EER calculation failed - using default threshold"))

  else:
    print("Warning: Only one class present in validation data")
    eer_threshold = 0.5
    EER = 1.0
    roc_auc = 0.5
    print("EER = " + str(EER))
    print("AUC = " + str(roc_auc))

  acc, f1score = predictDVector(d_vectors, auth_class, x_test, y_test, threshold=eer_threshold, verbose=False)

  with open("test-results-td-bestmatch.txt", "a") as f:
      f.write(f"Speaker {auth_class} | Train Size: {train_size}\n")
      f.write(f"Accuracy: {acc:.4f} | F1: {f1score:.4f} | EER: {EER:.4f} | AUC: {roc_auc:.4f}\n")
      f.close

def main():
    auth_class = 0
    train_sizes = [1, 8, 16, 64]

    global dv_model

    d_vector_model_name = "d-vector-extractor-256.h5"
    dv_model = tfk.models.load_model(d_vector_model_name)
    dv_model.compile(loss=tfk.losses.CategoricalCrossentropy(),
                    optimizer=tfk.optimizers.Adam(learning_rate=0.0001),
                    metrics=['accuracy'])
    dv_model.summary()
    with open("test-results-td-bestmatch.txt", "w") as f:
        f.write("Speaker Verification Results\n")
        f.write("==========================\n\n")

    for size in train_sizes:
        evaluate_model(auth_class, size)

if __name__ == "__main__":
    !rm -r /content/d_vectors/
    main()
    !mkdir /content/d_vectors/
    !mv d_vectors* /content/d_vectors/

2.18.0




Testing with speaker id: 0 and train size: 1
Validation class distribution: (array([-1,  0], dtype=int32), array([115,  62]))
=== Dataset Summary ===
Training: 1 samples (should include both classes)
Validation: 177 samples - Classes: (array([-1,  0], dtype=int32), array([115,  62]))
Testing: 178 samples - Classes: (array([-1,  0], dtype=int32), array([118,  60]))
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
(1, 256)
Saved d_vectors and labels to d_vectors_0_1.npz
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step 
-----
Plotting the Receiving Operating Characteristic curve:
-----
EER Threshold:  0.7693581581115723
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
-----------------------
 --- Testing Results ---
True Positive Rate: 41/60 (68.33333333333333%)
False Positive Rate: 23/118 (19.491525423728813%)
Precision: 0.640625
Recall: 0.6833333333333333
******************
Total correct 136/178
Accuracy on this datas

Mean Cos

In [17]:
import os
import numpy as np
import tensorflow as tf
from sklearn.metrics import roc_curve, auc
import sys

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
seed = 22
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

print(tf.__version__)

def cosine_similarity(vec1, vec2):
    """Compute cosine similarity between two vectors"""
    dot_product = np.dot(vec1, vec2)
    norm_product = np.linalg.norm(vec1) * np.linalg.norm(vec2)
    return dot_product / (norm_product + 1e-8)

def compute_mean_d_vector(d_vectors):
    """Compute the mean d-vector from enrollment samples"""
    return np.mean(d_vectors, axis=0)

def predictDVector(mean_d_vector, authlabel, input_data, input_labels, threshold, verbose=True):
    """Evaluate performance using mean cosine similarity"""
    input_vectors = dv_model.predict(input_data)
    total = len(input_vectors)

    is_target = (input_labels == authlabel)
    total_auth = np.sum(is_target)
    total_denied = total - total_auth

    similarities = np.array([cosine_similarity(vec, mean_d_vector) for vec in input_vectors])
    predictions = similarities > threshold

    true_pos = np.sum(predictions & is_target)
    true_neg = np.sum(~predictions & ~is_target)
    false_pos = np.sum(predictions & ~is_target)
    false_neg = np.sum(~predictions & is_target)

    with np.errstate(divide='ignore', invalid='ignore'):
        prec = true_pos / (true_pos + false_pos) if (true_pos + false_pos) > 0 else 0
        recall = true_pos / total_auth if total_auth > 0 else 0
        f1 = 2 * (prec * recall) / (prec + recall) if (prec + recall) > 0 else 0
        acc = (true_pos + true_neg) / total
        fpr = false_pos / total_denied if total_denied > 0 else 0

    print('-----------------------')
    print(" --- Testing Results ---")
    print(f"Target samples: {total_auth}/{total}")
    print(f"Non-target samples: {total_denied}/{total}")
    print(f"True Positive Rate: {true_pos}/{total_auth} ({true_pos/total_auth*100:.1f}%)")
    print(f"False Positive Rate: {false_pos}/{total_denied} (N/A)" if total_denied == 0 else
          f"False Positive Rate: {false_pos}/{total_denied} ({false_pos/total_denied*100:.1f}%)")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"Accuracy: {acc:.4f}")

    return acc, f1

def provide_predictions(mean_d_vector, input_data):
    """Generate similarity predictions using mean d-vector"""
    input_vectors = dv_model.predict(input_data)
    return np.array([cosine_similarity(vec, mean_d_vector) for vec in input_vectors])

def evaluate_model(auth_class, train_size):
    """Complete evaluation pipeline for mean cosine approach"""
    print(f"\nEvaluating speaker {auth_class} with {train_size} enrollment samples (Mean Cosine)")

    try:
        train_dir = f"dataset/user_0_organized/npz_features/train_{train_size}_{auth_class}_features.npz"
        training_npz = np.load(train_dir)
        x_train = training_npz['features']

        val_dir = "dataset/user_0_organized/npz_features/validation_features.npz"
        validation_npz = np.load(val_dir)
        x_val, y_val = validation_npz['features'], validation_npz['labels']

        testing_dir = "dataset/user_0_organized/npz_features/testing_features.npz"
        testing_npz = np.load(testing_dir)
        x_test, y_test = testing_npz['features'], testing_npz['labels']

        print("=== Dataset Summary ===")
        print(f"Training: {len(x_train)} samples")
        print(f"Validation: {len(x_val)} samples - Classes: {np.unique(y_val, return_counts=True)}")
        print(f"Testing: {len(x_test)} samples - Classes: {np.unique(y_test, return_counts=True)}")

        d_vectors = dv_model.predict(x_train.reshape(-1, 40, 40, 1))
        mean_d_vector = compute_mean_d_vector(d_vectors)
        print(f"Mean D-Vector computed using {len(d_vectors)} samples")

        y_pred_prob = provide_predictions(mean_d_vector, x_val.reshape(-1, 40, 40, 1))
        y_val_bin = (y_val == auth_class).astype(int)

        if len(np.unique(y_val_bin)) > 1:
            fpr, tpr, thresholds = roc_curve(y_val_bin, y_pred_prob)
            roc_auc = auc(fpr, tpr)

            fnr = 1 - tpr
            try:
                eer_threshold = thresholds[np.nanargmin(np.absolute((fnr - fpr)))]
                abs_diffs = np.abs(fpr - fnr)
                min_index = np.argmin(abs_diffs)
                EER = np.mean((fpr[min_index], fnr[min_index]))
            except:
                eer_threshold = 0.5
                EER = 1.0
                print("Warning: EER calculation failed - using default threshold")
        else:
            print("Warning: Only one class present in validation data")
            eer_threshold = 0.5
            EER = 1.0
            roc_auc = 0.5

        acc, f1score = predictDVector(mean_d_vector, auth_class,
                                    x_test.reshape(-1, 40, 40, 1), y_test,
                                    threshold=eer_threshold, verbose=False)

        with open("test-results-td-meancos.txt", "a") as f:
            f.write(f"Speaker {auth_class} | Train Size: {train_size}\n")
            f.write(f"Accuracy: {acc:.4f} | F1: {f1score:.4f} ")
            f.write(f"| EER: {EER:.4f} | AUC: {roc_auc:.4f}\n\n")

        return acc, f1score, EER, roc_auc

    except Exception as e:
        print(f"Error in evaluation: {e}")
        return 0, 0, 1, 0

def main():
    auth_class = 0
    train_sizes = [1, 8, 16, 64]

    global dv_model
    d_vector_model_name = "d-vector-extractor-256.h5"
    dv_model = tf.keras.models.load_model(d_vector_model_name)
    dv_model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
                    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                    metrics=['accuracy'])

    with open("test-results-td-meancos.txt", "w") as f:
        f.write("Speaker Verification Results (Mean Cosine)\n")
        f.write("========================================\n\n")

    for size in train_sizes:
        evaluate_model(auth_class, size)

if __name__ == "__main__":
    main()



2.18.0

Evaluating speaker 0 with 1 enrollment samples (Mean Cosine)
=== Dataset Summary ===
Training: 1 samples
Validation: 177 samples - Classes: (array([-1,  0], dtype=int32), array([115,  62]))
Testing: 178 samples - Classes: (array([-1,  0], dtype=int32), array([118,  60]))
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
Mean D-Vector computed using 1 samples
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
-----------------------
 --- Testing Results ---
Target samples: 60/178
Non-target samples: 118/178
True Positive Rate: 41/60 (68.3%)
False Positive Rate: 23/118 (19.5%)
Precision: 0.6406
Recall: 0.6833
F1-Score: 0.6613
Accuracy: 0.7640

Evaluating speaker 0 with 8 enrollment samples (Mean Cosine)
=== Dataset Summary ===
Training: 8 samples
Validation: 177 samples - Classes: (array([-1,  0], dtype=int32), array([115,  62]))
Testing: 178 samples - Classes: (ar

Inspect .npz structure

In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt

def verify_npz_files(directory="/content/d_vectors/"):
    """Verify contents of .npz files in a directory."""
    npz_files = [f for f in os.listdir(directory) if f.endswith('.npz')]

    if not npz_files:
        print(f"No .npz files found in {directory}!")
        return

    print(f"Found {len(npz_files)} .npz files in {directory}:\n")

    for file in sorted(npz_files):
        filepath = os.path.join(directory, file)
        data = np.load(filepath)

        print(f"File: {file}")
        print(f"Number of arrays stored: {len(data.files)}\n")

        for array_name in data.files:
            array_data = data[array_name]

            print(f"  Array: '{array_name}'")
            if(array_name=='d_vectors'):
              print(f"    - Shape: {array_data.shape}")
              print(f"    - Dtype: {array_data.dtype}")
              print(f"    - Min: {np.min(array_data):.4f}, Max: {np.max(array_data):.4f}, Mean: {np.mean(array_data):.4f}")
              print(f"    - Size: {array_data.size} elements\n")

        print("-" * 50 + "\n")

if __name__ == "__main__":
    verify_npz_files()

Found 4 .npz files in /content/d_vectors/:

File: d_vectors_0_1.npz
Number of arrays stored: 2

  Array: 'labels'
  Array: 'd_vectors'
    - Shape: (1, 256)
    - Dtype: float32
    - Min: 0.0000, Max: 11.1570, Mean: 1.5854
    - Size: 256 elements

--------------------------------------------------

File: d_vectors_0_16.npz
Number of arrays stored: 2

  Array: 'labels'
  Array: 'd_vectors'
    - Shape: (16, 256)
    - Dtype: float32
    - Min: 0.0000, Max: 13.2575, Mean: 1.3215
    - Size: 4096 elements

--------------------------------------------------

File: d_vectors_0_64.npz
Number of arrays stored: 2

  Array: 'labels'
  Array: 'd_vectors'
    - Shape: (64, 256)
    - Dtype: float32
    - Min: 0.0000, Max: 13.4420, Mean: 1.3232
    - Size: 16384 elements

--------------------------------------------------

File: d_vectors_0_8.npz
Number of arrays stored: 2

  Array: 'labels'
  Array: 'd_vectors'
    - Shape: (8, 256)
    - Dtype: float32
    - Min: 0.0000, Max: 12.2884, Mean: 1.

Convert .npz file generated in a C header

In [18]:
import numpy as np
import os

def sanitize_name(name):
    """Sanitize array name for C variable naming."""
    return name.replace('.', '_').replace('-', '_')

def generate_npz_header(npz_dir="/content/d_vectors", output_file="d_vectors.h"):
    """Generate a C header with 2D arrays (samples x 256) from .npz files"""
    npz_files = [f for f in os.listdir(npz_dir) if f.endswith('.npz')]

    with open(output_file, "w") as f:
        f.write("#ifndef D_VECTORS_H\n")
        f.write("#define D_VECTORS_H\n\n")
        f.write("#include <stddef.h>\n\n")

        for file in sorted(npz_files):
            file_path = os.path.join(npz_dir, file)
            with np.load(file_path) as data:
                if 'd_vectors' not in data:
                    continue

                d_vectors = data['d_vectors']
                if d_vectors.ndim != 2 or d_vectors.shape[1] != 256:
                    continue

                array_name = sanitize_name(os.path.splitext(file)[0])
                samples = d_vectors.shape[0]
                f.write(f"// {samples} samples of 256-dimensional vectors\n")
                f.write(f"static const float {array_name}[{samples}][256] = {{\n")
                for sample in d_vectors:
                    f.write("    {")
                    line = ", ".join(f"{x:.6f}f" for x in sample)
                    f.write(line)
                    f.write("},\n")

                f.write("};\n\n")

        f.write("#endif // D_VECTORS_H\n")

if __name__ == "__main__":
    generate_npz_header()