<h1 style=\"text-align: center; font-size: 50px;\"> Register Model </h1>

# Notebook Overview

- Start Execution
- Install and Import Libraries
- Configure Settings
- Register the Model Log Results to MLFlow

# Start Execution

In [1]:
import logging
import time

# Configure logger
logger: logging.Logger = logging.getLogger("register_model_logger")
logger.setLevel(logging.INFO)
logger.propagate = False  # Prevent duplicate logs from parent loggers

# Set formatter
formatter: logging.Formatter = logging.Formatter(
    fmt="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)

# Configure and attach stream handler
stream_handler: logging.StreamHandler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)

In [2]:
start_time = time.time()  
logger.info("Notebook execution started.")

2025-09-05 18:50:16 - INFO - Notebook execution started.


# Install and Import Libraries

In [3]:
%%time

%pip install -r ../requirements.txt --quiet

Note: you may need to restart the kernel to use updated packages.
CPU times: user 215 ms, sys: 100 ms, total: 315 ms
Wall time: 20.8 s


In [4]:


import nemo                             # NVIDIA NeMo core package
import nemo.collections.asr as nemo_asr # Speech Recognition (ASR) collection
import nemo.collections.tts as nemo_tts # Text-to-Speech (TTS) collection

# ------------------------- Transformers -------------------------
from transformers import MarianMTModel, MarianTokenizer

# ------------------------- Audio Processing Utilities -------------------------
import IPython                          # For playing audio inside Jupyter Notebooks
import soundfile                        # For reading and writing audio files
from pathlib import Path                # Filesystem path management

# ------------------------- System Utilities -------------------------

import os                               # Operating system interfaces
import shutil                           # High-level file operations
import uuid                             # Unique ID generation
import io                               # Input/Output core tools
import base64                           # Encoding and decoding base64 strings
import json                             # JSON serialization and deserialization
import warnings                         # Suppressing and managing warnings
import numpy as np                      # Numerical array operations
np.float_ = np.float64
import torch

# ------------------------- MLflow Integration -------------------------

import mlflow                           # MLflow experiment tracking and model management
from mlflow.types.schema import Schema, ColSpec
from mlflow.types import ParamSchema, ParamSpec
from mlflow.models import ModelSignature

# ------------------------ Utils Import ------------------------
import sys
sys.path.append("../src")
from onnx_utils import ModelExportConfig
from utils import load_config

    


# Configure Settings

In [5]:
# ------------------------ Suppress Verbose Logs ------------------------
warnings.filterwarnings("ignore")

# Suppress NeMo internal logging
logging.getLogger('nemo_logger').setLevel(logging.ERROR)

In [6]:
# ------------------------- Model File Paths -------------------------
MT_MODEL = "Helsinki-NLP/opus-mt-en-es"
ASR_MODEL_PATH = "/home/jovyan/datafabric/STT_En_Citrinet_1024_Gamma_0.25/stt_en_citrinet_1024_gamma_0_25.nemo"                  # Speech-to-Text (ASR) model
SPECTROGRAM_GENERATOR_PATH = "/home/jovyan/datafabric/TTS_Es_Multispeaker_FastPitch_HiFiGAN/tts_es_fastpitch_multispeaker.nemo"  # Spectrogram generator model (FastPitch)
VOCODER_PATH = "/home/jovyan/datafabric/TTS_Es_Multispeaker_FastPitch_HiFiGAN/tts_es_hifigan_ft_fastpitch_multispeaker.nemo"     # Vocoder model (HiFiGAN)

AUDIO_SAMPLE_PATH = "../data/ForrestGump.mp3"      # Path to the input English audio sample

# ------------------------- MLflow Experiment Configuration -------------------------

EXPERIMENT_NAME = "NeMo_Translation_Experiment"    # MLflow experiment name
RUN_NAME = "NeMo_en_es_Translation_Run"            # Specific run name inside the experiment
MODEL_NAME = "nemo_en_es"                          # Registered model name in MLflow
DEMO_PATH = "../demo"                              # Path to save demo outputs

# Register the Model and Log Results to MLFlow

In [7]:
class NemoTranslationModel(mlflow.pyfunc.PythonModel):
    """
    A custom MLflow pyfunc model for performing end-to-end audio translation using NVIDIA NeMo models.
    """

    def load_context(self, context):
        """Load NeMo models and prepare the temporary working directory."""
        model_dir = context.artifacts["model"]

        self.asr_model = nemo_asr.models.EncDecCTCModel.restore_from(f"{model_dir}/enc_dec_CTC.nemo")
        self.mt_tokenizer = MarianTokenizer.from_pretrained(MT_MODEL)
        self.mt_model = MarianMTModel.from_pretrained(MT_MODEL)
        self.spectrogram_generator = nemo_tts.models.FastPitchModel.restore_from(f"{model_dir}/fast_pitch.nemo")
        self.vocoder = nemo_tts.models.HifiGanModel.restore_from(f"{model_dir}/hifi_gan.nemo")

        self.framerate = 41000

        os.makedirs("/phoenix/mlflow/tmp", exist_ok=True)

    def transcribe_audio(self, model_input):
        """Deserialize base64-encoded audio, save it temporarily, and perform speech-to-text."""
        serialized_audio = model_input['source_serialized_audio'][0]
        audio_buffer = io.BytesIO(base64.b64decode(serialized_audio))
        audio_array, self.framerate = soundfile.read(audio_buffer)

        # Ensure mono-channel audio
        if audio_array.ndim > 1:
            audio_array = audio_array[:, 0]

        temp_wave_path = f"/phoenix/mlflow/tmp/{self.file_id}.wav"
        soundfile.write(temp_wave_path, audio_array, self.framerate)

        # Perform ASR
        transcribed_text = self.asr_model.cuda().transcribe([temp_wave_path])
        return transcribed_text

    def text_to_audio(self, text: str):
        """Generate audio waveform from text using TTS models."""
        parsed_tokens = self.spectrogram_generator.cuda().parse(text)
        spectrogram = self.spectrogram_generator.cuda().generate_spectrogram(tokens=parsed_tokens, speaker=2)
        audio_tensor = self.vocoder.cuda().convert_spectrogram_to_audio(spec=spectrogram)

        return audio_tensor.to('cpu').detach().numpy()

    def serialize_audio(self, audio_array: np.ndarray):
        """Serialize a NumPy audio array into a base64-encoded WAV file."""

        
        wave_path = f"/phoenix/mlflow/tmp/out_{self.file_id}.wav"
        soundfile.write(wave_path, audio_array, samplerate=self.framerate, format='WAV')

        with io.BytesIO() as buffer:
            soundfile.write(buffer, audio_array, samplerate=self.framerate, format='WAV')
            buffer.seek(0)
            audio_base64 = base64.b64encode(buffer.read()).decode('utf-8')

        return audio_base64

    def predict(self, context, model_input, params=None):
        """
        Perform inference:
        1. Transcribe audio (if input is audio)
        2. Translate text using Hugging Face MarianMT
        3. Synthesize translated text into speech
        4. Serialize the audio if needed
        """

        self.file_id = uuid.uuid1()
        use_audio = params.get("use_audio", False)

        if use_audio:
            source_text = self.transcribe_audio(model_input)[0]
        else:
            source_text = model_input['source_text'][0]

        # Move model to GPU if available
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.mt_model = self.mt_model.to(device)

        # Tokenize and move inputs to device
        inputs = self.mt_tokenizer(source_text, return_tensors="pt", padding=True)
        inputs = {key: value.to(device) for key, value in inputs.items()}

        # Generate translation
        translated = self.mt_model.generate(**inputs)
        translated_text = self.mt_tokenizer.decode(translated[0], skip_special_tokens=True)

        translated_audio_base64 = ""
        if use_audio:
            audio_array = self.text_to_audio(translated_text)
            translated_audio_base64 = self.serialize_audio(audio_array[0])

        return {
            "original_text": source_text,
            "translated_text": translated_text,
            "translated_serialized_audio": translated_audio_base64,
        }

    @classmethod
    def log_model(cls, model_name: str, nemo_models: dict, demo_folder: str, config_path: str, pip_requirements: str | list[str] | None = None,):
        """
        Log the translation model to MLflow with model artifacts and signatures.
        
        Args:
            model_name: Name under which to register the model.
            nemo_models: Dictionary mapping component names to their local .nemo file paths.
            demo_folder: Path to the demo files folder.
        """
        sys.path.append("../src")
        from onnx_utils import ModelExportConfig,log_model
        
        input_schema = Schema([
            ColSpec("string", "source_text"),
            ColSpec("string", "source_serialized_audio"),
        ])

        output_schema = Schema([
            ColSpec("string", "original_text"),
            ColSpec("string", "translated_text"),
            ColSpec("string", "translated_serialized_audio"),
        ])

        params_schema = ParamSchema([
            ParamSpec("use_audio", "boolean", False)
        ])

        signature = ModelSignature(
            inputs=input_schema,
            outputs=output_schema,
            params=params_schema
        )

        os.makedirs(model_name, exist_ok=True)

        # Copy NeMo model artifacts
        if "enc_dec_CTC" in nemo_models:
            shutil.copyfile(nemo_models["enc_dec_CTC"], f"{model_name}/enc_dec_CTC.nemo")
        if "fast_pitch" in nemo_models:
            shutil.copyfile(nemo_models["fast_pitch"], f"{model_name}/fast_pitch.nemo")
        if "hifi_gan" in nemo_models:
            shutil.copyfile(nemo_models["hifi_gan"], f"{model_name}/hifi_gan.nemo")

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Loading Models in memory to convert to onnx
        mt_model = MarianMTModel.from_pretrained(MT_MODEL)
        asr_model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_models["enc_dec_CTC"])
        fast_pitch_model = nemo_tts.models.FastPitchModel.restore_from(nemo_models["fast_pitch"])
        hifi_gan_model = nemo_tts.models.HifiGanModel.restore_from(nemo_models["hifi_gan"])
      

        # 🎯 Create ModelExportConfig objects with loaded models (manual configuration)
        model_configs = [ 
            ModelExportConfig(
                model=mt_model,                         # 🚀 Pre-loaded Transformers model!
                model_name="Helsinki-NLP",              # ONNX file naming
                task="translation",                     # Model task
            ),
            # NeMo ASR model
            ModelExportConfig(
                model=asr_model.to(device),                        # 🚀 Pre-loaded NeMo ASR model!
                model_name="enc_dec_CTC",               # ONNX file naming
            ),
            # NeMo FastPitch model
            ModelExportConfig(
                model=fast_pitch_model.to(device),                 # 🚀 Pre-loaded NeMo TTS model!
                model_name="fast_pitch",                # ONNX file naming
            ),
            # NeMo HifiGAN model
            ModelExportConfig(
                model=hifi_gan_model.to(device),                   # 🚀 Pre-loaded NeMo Vocoder model!
                model_name="hifi_gan",                  # ONNX file naming
            ),
        ] 
            
            
        log_model(
            artifact_path=model_name,
            python_model=cls(),
            artifacts={"model": model_name, "demo": demo_folder, "config": config_path},
            signature=signature,
            models_to_convert_onnx=model_configs,
            pip_requirements=pip_requirements
        )

         # Clean up temporary files
        shutil.rmtree(model_name)

In [8]:
# ------------------------- MLflow Model Logging and Registration -------------------------

mlflow.set_tracking_uri('/phoenix/mlflow')
# Set the MLflow experiment
mlflow.set_experiment(experiment_name=EXPERIMENT_NAME)

# Start a new MLflow run
with mlflow.start_run(run_name=RUN_NAME) as run:
    # Define the set of NeMo model components to be logged
    nemo_model_artifacts = {
        "enc_dec_CTC": ASR_MODEL_PATH,
        "fast_pitch": SPECTROGRAM_GENERATOR_PATH,
        "hifi_gan": VOCODER_PATH,
    }

    # Log the custom translation model with specified artifacts and demo folder
    NemoTranslationModel.log_model(
        model_name=MODEL_NAME,
        nemo_models=nemo_model_artifacts,
        demo_folder="../demo",
        config_path="../configs/config.yaml",
        pip_requirements="../requirements.txt"
    )

    # Register the logged model in MLflow Model Registry
    mlflow.register_model(
        model_uri=f"runs:/{run.info.run_id}/{MODEL_NAME}",
        name=MODEL_NAME
    )

2025/09/05 18:50:54 INFO mlflow.tracking.fluent: Experiment with name 'NeMo_Translation_Experiment' does not exist. Creating a new experiment.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

2025-09-05 18:52:37 - INFO - 🔧 Generating ONNX model(s) for specified models...
2025-09-05 18:52:37 - INFO - 🔄 Converting transformers model: Helsinki-NLP
2025-09-05 18:52:37 - INFO - 📁 Model directory: Helsinki-NLP
2025-09-05 18:52:37 - INFO - 🔍 Model identified as: transformers
2025-09-05 18:52:37 - INFO - 🤗 Converting loaded Transformers model for task: translation with opset 12


tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/826k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

2025-09-05 18:52:45 - INFO - ✅ Transformers model exported to: Helsinki-NLP/model.onnx
2025-09-05 18:52:45 - INFO - ✅ Converted Helsinki-NLP to directory: Helsinki-NLP
2025-09-05 18:52:45 - INFO - 🔄 Converting nemo model: enc_dec_CTC
2025-09-05 18:52:45 - INFO - 📁 Model directory: enc_dec_CTC
2025-09-05 18:52:45 - INFO - 🔍 Model identified as: nemo
2025-09-05 18:52:45 - INFO - 🔄 Exporting loaded NeMo model with format: ONNX
2025-09-05 18:52:45 - INFO - 🔄 Using official NVIDIA export() method
2025-09-05 18:53:03 - INFO - Model saved to ONNX: enc_dec_CTC/model.onnx
2025-09-05 18:53:04 - INFO - ✅ Converted enc_dec_CTC to directory: enc_dec_CTC
2025-09-05 18:53:04 - INFO - 🔄 Converting nemo model: fast_pitch
2025-09-05 18:53:04 - INFO - 📁 Model directory: fast_pitch
2025-09-05 18:53:04 - INFO - 🔍 Model identified as: nemo
2025-09-05 18:53:04 - INFO - 🔄 Exporting loaded NeMo model with format: ONNX
2025-09-05 18:53:04 - INFO - 🔄 Using official NVIDIA export() method
2025-09-05 18:53:12 - IN

Removing weight norm...
Removing weight norm...


2025-09-05 18:53:14 - INFO - Model saved to ONNX: hifi_gan/model.onnx
2025-09-05 18:53:14 - INFO - ✅ Converted hifi_gan to directory: hifi_gan
2025-09-05 18:53:14 - INFO - 📦 Added model directory artifact: model_Helsinki-NLP -> Helsinki-NLP
2025-09-05 18:53:14 - INFO - 📦 Added model directory artifact: model_enc_dec_CTC -> enc_dec_CTC
2025-09-05 18:53:14 - INFO - 📦 Added model directory artifact: model_fast_pitch -> fast_pitch
2025-09-05 18:53:14 - INFO - 📦 Added model directory artifact: model_hifi_gan -> hifi_gan
2025-09-05 18:53:14 - INFO -   No Triton structure requested, using model directories as-is


Downloading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/26 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

2025-09-05 18:54:10 - INFO - Model logged with artifacts: ['model', 'demo', 'config', 'model_Helsinki-NLP', 'model_enc_dec_CTC', 'model_fast_pitch', 'model_hifi_gan']
2025-09-05 18:54:10 - INFO - ✅ Model logged with 4 model directories created!
Successfully registered model 'nemo_en_es'.
Created version '1' of model 'nemo_en_es'.


In [9]:
# ------------------------- Success Confirmation -------------------------

print(f"✅ Model '{MODEL_NAME}' successfully logged and registered under experiment '{EXPERIMENT_NAME}'.")

✅ Model 'nemo_en_es' successfully logged and registered under experiment 'NeMo_Translation_Experiment'.


In [10]:
end_time: float = time.time()
elapsed_time: float = end_time - start_time
elapsed_minutes: int = int(elapsed_time // 60)
elapsed_seconds: float = elapsed_time % 60

logger.info(f"⏱️ Total execution time: {elapsed_minutes}m {elapsed_seconds:.2f}s")
logger.info("✅ Notebook execution completed successfully.")

2025-09-05 18:54:10 - INFO - ⏱️ Total execution time: 3m 54.62s
2025-09-05 18:54:10 - INFO - ✅ Notebook execution completed successfully.


Built with ❤️ using [**HP AI Studio**](https://hp.com/ai-studio).