In [1]:
import torch
import os
import json
import librosa
import glob
import subprocess
import nemo.collections.asr as nemo_asr
from omegaconf import OmegaConf, DictConfig
from nemo.core.config import hydra_runner
from nemo.utils import logging
from nemo.utils.exp_manager import exp_manager

import lightning.pytorch as pl
# Correct import
from lightning.pytorch import Trainer
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping
from lightning.pytorch.loggers import TensorBoardLogger

print("PyTorch CUDA available:", torch.cuda.is_available(), "CUDA version:", torch.version.cuda)

# Download and prepare AN4 data (same as before)
DATA_DIR = os.getcwd() + "/files/"
os.environ["DATA_DIR"] = DATA_DIR


PyTorch CUDA available: True CUDA version: 12.6


In [3]:
# Load the pre-trained model .from_pretrained
# model = nemo_asr.models.EncDecRNNTBPEModel.restore_from("parakeet-tdt-0.6b-v2.nemo")
# model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("parakeet-tdt-0.6b-v2.nemo")

# model = nemo_asr.models.ASRModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2")
model = nemo_asr.models.ASRModel.restore_from("./parakeet-tdt-0.6b-v2/parakeet-tdt-0.6b-v2.nemo")

maxi, mini = (73.038375, 1.33225) # TODO: insert real values

# Create training configuration
train_config = DictConfig({
    'manifest_filepath': f'{DATA_DIR}/train_manifest.json',
    'sample_rate': 4500 * 5,
    'batch_size': 2,  # Reduced batch size for stability
    'shuffle': True,
    'num_workers': 2,  # Reduced for stability
    'pin_memory': True,
    'trim_silence': True,
    'max_duration': maxi,
    'min_duration': mini,
    "trim": True,
})

val_config = DictConfig({
    'manifest_filepath': f'{DATA_DIR}/test_manifest.json',
    'sample_rate': 16000,
    'batch_size': 2,
    'shuffle': False,
    'num_workers': 2,
    'pin_memory': True,
     "trim": True,
})


[NeMo I 2025-06-04 02:32:07 nemo_logging:393] Tokenizer SentencePieceTokenizer initialized with 1024 tokens


[NeMo W 2025-06-04 02:32:07 nemo_logging:405] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    use_lhotse: true
    skip_missing_manifest_entries: true
    input_cfg: null
    tarred_audio_filepaths: null
    manifest_filepath: null
    sample_rate: 16000
    shuffle: true
    num_workers: 2
    pin_memory: true
    max_duration: 40.0
    min_duration: 0.1
    text_field: answer
    batch_duration: null
    use_bucketing: true
    bucket_duration_bins: null
    bucket_batch_size: null
    num_buckets: 30
    bucket_buffer_size: 20000
    shuffle_buffer_size: 10000
    
[NeMo W 2025-06-04 02:32:07 nemo_logging:405] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s). 
    Validation config :

[NeMo I 2025-06-04 02:32:07 nemo_logging:393] PADDING: 0
[NeMo I 2025-06-04 02:32:12 nemo_logging:393] Using RNNT Loss : tdt
    Loss tdt_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0, 'durations': [0, 1, 2, 3, 4], 'sigma': 0.02, 'omega': 0.1}
[NeMo I 2025-06-04 02:32:12 nemo_logging:393] Using RNNT Loss : tdt
    Loss tdt_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0, 'durations': [0, 1, 2, 3, 4], 'sigma': 0.02, 'omega': 0.1}


[NeMo W 2025-06-04 02:32:12 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: No `cuda-python` module. Please do `pip install cuda-python>=12.3`


[NeMo I 2025-06-04 02:32:12 nemo_logging:393] Using RNNT Loss : tdt
    Loss tdt_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0, 'durations': [0, 1, 2, 3, 4], 'sigma': 0.02, 'omega': 0.1}


[NeMo W 2025-06-04 02:32:12 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: No `cuda-python` module. Please do `pip install cuda-python>=12.3`


[NeMo I 2025-06-04 02:32:14 nemo_logging:393] Model EncDecRNNTBPEModel was successfully restored from /home/raid/cognition/til/asr/notebooks/parakeet-tdt-0.6b-v2/parakeet-tdt-0.6b-v2.nemo.


In [4]:
print(type(model))
from lightning.pytorch import LightningModule
print(isinstance(model, LightningModule))

<class 'nemo.collections.asr.models.rnnt_bpe_models.EncDecRNNTBPEModel'>
True


In [5]:
# Set up training and validation data
model.setup_training_data(train_config)
model.setup_validation_data(val_config)

# Optional but recommended: prepare the model
# model.prepare_for_training()

[NeMo I 2025-06-04 02:32:23 nemo_logging:393] Dataset loaded with 4500 files totalling 31.91 hours
[NeMo I 2025-06-04 02:32:23 nemo_logging:393] 0 files were filtered totalling 0.00 hours
[NeMo I 2025-06-04 02:32:23 nemo_logging:393] Dataset loaded with 8 files totalling 0.06 hours
[NeMo I 2025-06-04 02:32:23 nemo_logging:393] 0 files were filtered totalling 0.00 hours


In [6]:
# Set up logger
tb_logger = TensorBoardLogger(save_dir="./tb_logs", name="parakeet_finetune")

# Checkpoint and early stopping on val_wer
checkpoint_callback = ModelCheckpoint(
    monitor="val_wer", mode="min", save_top_k=1,
    dirpath="./checkpoints", filename="best_val_wer"
)
early_stop_callback = EarlyStopping(
    monitor="val_wer", mode="min", patience=5
)

# Initialize Trainer
trainer = Trainer(
    max_epochs=30,
    accelerator="gpu", devices=1,
    logger=tb_logger,
    callbacks=[checkpoint_callback, early_stop_callback],
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [7]:
# Train the model
trainer.fit(model)

You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


[NeMo I 2025-06-04 02:32:34 nemo_logging:393] Optimizer config = AdamW (
    Parameter Group 0
        amsgrad: False
        betas: [0.9, 0.98]
        capturable: False
        decoupled_weight_decay: True
        differentiable: False
        eps: 1e-08
        foreach: None
        fused: None
        lr: 0.0001
        maximize: False
        weight_decay: 0.001
    )
[NeMo I 2025-06-04 02:32:34 nemo_logging:393] Scheduler "<nemo.core.optim.lr_scheduler.CosineAnnealing object at 0x7f7620a59e50>" 
    will be used during training (effective maximum steps = 67500) - 
    Parameters : 
    (warmup_steps: 0
    warmup_ratio: null
    min_lr: 1.0e-06
    max_steps: 67500
    )



  | Name              | Type                              | Params | Mode 
--------------------------------------------------------------------------------
0 | preprocessor      | AudioToMelSpectrogramPreprocessor | 0      | train
1 | encoder           | ConformerEncoder                  | 608 M  | train
2 | decoder           | RNNTDecoder                       | 7.2 M  | train
3 | joint             | RNNTJoint                         | 1.7 M  | train
4 | loss              | RNNTLoss                          | 0      | train
5 | spec_augmentation | SpectrogramAugmentation           | 0      | train
6 | wer               | WER                               | 0      | train
--------------------------------------------------------------------------------
617 M     Trainable params
0         Non-trainable params
617 M     Total params
2,471.304 Total estimated model params size (MB)
706       Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                                                                            …

[NeMo I 2025-06-04 02:32:34 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_bpe_models.EncDecRNNTBPEModel'>.decoding.decoding
[NeMo I 2025-06-04 02:32:34 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding
[NeMo I 2025-06-04 02:32:37 nemo_logging:393] 
    
[NeMo I 2025-06-04 02:32:37 nemo_logging:393] reference:Operation Echelon has yielded significant progress in our pursuit of the rogue AI droid BH-2000. Our surveillance drones have identified its current location in sector 7G, and our ground units are mobilizing for a targeted strike. Updates on BH-2000's movements indicate it is attempting to upload critical data, posing a potential threat to our operations. We recommend immediate action to neutralize the target before it can transfer any sensitive information.
[NeMo I 2025-06-04 02:32:37 nemo_logging:393] predicted:Operation Echelon has yielded significant progress in

Training: |                                                                                                   …

[NeMo I 2025-06-04 02:32:37 nemo_logging:393] Disabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_bpe_models.EncDecRNNTBPEModel'>.decoding.decoding
[NeMo I 2025-06-04 02:32:37 nemo_logging:393] Disabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding


NvvmError: Failed to verify

libnvvm : error: -arch=compute_89 is an unsupported option
NVVM_ERROR_INVALID_OPTION

In [None]:
model.save_to('model.nemo')