<h1 style="text-align: center; font-size: 50px;">Bert Model Registration</h1>

This Jupyter Notebook implements a BERT-based similarity model using MLflow for tracking, managing, and deploying the model. It loads a pre-trained BERT model, computes sentence embeddings, and retrieves the most similar sentences from a stored corpus based on cosine similarity.

# Notebook Overview
- Imports
- Configurations
- Verify Assets
- Downloading the Bert Large Uncased Model
- Defining the BERT Tourism Model Class
- Logging Model to MLflow
- Fetching the Latest Model Version from MLflow
- Loading the Model and Running Inference
- Displaying Results for the Input Query

# Imports

In [1]:
# Standard Library Imports
import os
import json
import shutil
import logging
import warnings
from pathlib import Path  

# Third-Party Libraries
import torch
import numpy as np
import pandas as pd
from tabulate import tabulate
from sklearn.metrics.pairwise import cosine_similarity

# MLflow for Experiment Tracking and Model Management
import mlflow
import mlflow.pyfunc
from mlflow import MlflowClient
from mlflow.models.signature import ModelSignature
from mlflow.types.schema import Schema, ColSpec, TensorSpec, ParamSchema, ParamSpec

# Transformers and NLP Libraries
from transformers import AutoTokenizer
from transformers import logging as hf_logging
from nemo.collections.nlp.models.language_modeling import BERTLMModel

# Configurations

In [2]:
# ------------------------ Suppress Verbose Logs ------------------------
warnings.filterwarnings("ignore")

# Hugging Face Transformers logs
hf_logging.set_verbosity_error()

# NVIDIA NeMo logs
logging.getLogger("nemo_logger").setLevel(logging.ERROR)

In [3]:
# Create logger
logger = logging.getLogger("tourism_logger")
logger.setLevel(logging.INFO)

formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s", 
                              datefmt="%Y-%m-%d %H:%M:%S") 

stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
logger.propagate = False

In [4]:
CORPUS_PATH = "../data/raw/corpus.csv"
EMBEDDINGS_PATH = "../data/processed/embeddings.csv"
TOKENIZER_DIR = "../artifacts/tokenizer"
BERT_MODEL_NAME = "bert-large-uncased"
BERT_MODEL_ONLINE_PATH = "/root/.cache/torch/NeMo/NeMo_1.22.0/bertlargeuncased/ca4ebba9f05a8ffb79845249ca046983/bertlargeuncased.nemo"
BERT_MODEL_DATAFABRIC_PATH = "/home/jovyan/datafabric/Bertlargeuncased/bertlargeuncased.nemo"
DEMO_PATH = "../demo"
EXPERIMENT_NAME = "BERT_Tourism_Experiment"
RUN_NAME = "BERT_Tourism_Run"
MODEL_NAME = "BERT_Tourism_Model"

In [5]:
logger.info('Notebook execution started.')

2025-04-08 21:24:11 - INFO - Notebook execution started.


# Verify Assets

In [6]:
def log_asset_status(asset_path: str, asset_name: str, success_message: str, failure_message: str) -> None:
    """
    Logs the status of a given asset based on its existence.

    Parameters:
        asset_path (str): File or directory path to check.
        asset_name (str): Name of the asset for logging context.
        success_message (str): Message to log if asset exists.
        failure_message (str): Message to log if asset does not exist.
    """
    if Path(asset_path).exists():
        logger.info(f"{asset_name} is properly configured. {success_message}")
    else:
        logger.info(f"{asset_name} is not properly configured. {failure_message}")


# Check and log status for BERT model, embeddings file, and tokenizer
log_asset_status(
    asset_path=BERT_MODEL_DATAFABRIC_PATH,
    asset_name="BERT model",
    success_message="",
    failure_message="Please create and download the required assets in your project on AI Studio."
)

log_asset_status(
    asset_path=EMBEDDINGS_PATH,
    asset_name="Embeddings file",
    success_message="",
    failure_message="Please run the '00_Word_Embeddings_Generation' notebook to create the embeddings file."
)

log_asset_status(
    asset_path=TOKENIZER_DIR,
    asset_name="Tokenizer",
    success_message="",
    failure_message="Please run the '00_Word_Embeddings_Generation' notebook to save the tokenizer."
)

2025-04-08 21:24:11 - INFO - BERT model is properly configured. 
2025-04-08 21:24:11 - INFO - Embeddings file is properly configured. 
2025-04-08 21:24:11 - INFO - Tokenizer is properly configured. 


# Downloading the Bert Large Uncased Model

In [7]:
# Ensure you have added the 'bertlargeuncased' model from the NVIDIA NGC model catalog.
# If unavailable, uncomment the following line and use the alternative method below to download the BERT model online.
# bert_model = BERTLMModel.from_pretrained(model_name="bertlargeuncased", strict=False).to(device)

# Defining the BERT Tourism Model Class

In [None]:
class BERTTourismModel(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        """
        Load precomputed embeddings, corpus, and the pre-trained BERT model.
        """
        # Load precomputed embeddings and corpus data
        self.embeddings_df = pd.read_csv(context.artifacts['embeddings_path'])
        self.corpus_df = pd.read_csv(context.artifacts['corpus_path'])
        
        # Load tokenizer for BERT
        self.tokenizer = AutoTokenizer.from_pretrained(context.artifacts["tokenizer_dir"])
        
        # Set device to GPU if available, otherwise use CPU
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        # Load pre-trained BERT model
        self.bert_model = BERTLMModel.restore_from(context.artifacts['bert_model_path'], strict=False).to(self.device)
    
    def generate_query_embedding(self, query):
        """
        Generate BERT embeddings for the input query.
        """
        self.bert_model.eval()  # Set model to evaluation mode
        
        # Tokenize the input query and move tensors to the selected device
        encoded_input = self.tokenizer(query, padding=True, truncation=True, return_tensors="pt", max_length=128)
        encoded_input = {key: val.to(self.device) for key, val in encoded_input.items()}
        
        # Get the model's output embedding
        with torch.no_grad():
            output = self.bert_model.bert_model(**encoded_input)
        
        # Return the [CLS] token embedding as a NumPy array
        return output[:, 0, :].cpu().numpy()
    
    def predict(self, context, model_input, params):
        """
        Compute similarity between query and precomputed embeddings,
        then return the top 5 most similar results.
        """
        # Extract the query string from model input
        query = model_input["query"][0]
        
        # Generate query embedding
        query_embedding = self.generate_query_embedding(query)
        
        # Compute cosine similarity between query and precomputed embeddings
        similarities = cosine_similarity(query_embedding, self.embeddings_df.values)
        
        # Get indices of top 5 most similar results
        top_indices = np.argsort(similarities[0])[::-1][:5]
        
        # Retrieve corresponding results from the corpus
        results = self.corpus_df.iloc[top_indices].copy()
        results.loc[:, 'Similarity'] = similarities[0][top_indices]
        
        # Return results as a dictionary
        return results.to_dict(orient="records")
    
    @classmethod
    def log_model(cls, model_name):
        """
        Logs the model to MLflow with appropriate artifacts and schema.
        """
        # Define input and output schema
        input_schema = Schema([ColSpec("string", "query")])
        output_schema = Schema([
            TensorSpec(np.dtype("object"), (-1,), "List of Pledges and Similarities")
        ])
        params_schema = ParamSchema([ParamSpec("show_score", "boolean", False)])
        
        # Define model signature
        signature = ModelSignature(inputs=input_schema, outputs=output_schema, params=params_schema)
        
        # Log the model in MLflow
        mlflow.pyfunc.log_model(
            model_name,
            python_model=cls(),
            artifacts={
                "corpus_path": CORPUS_PATH,
                "embeddings_path": EMBEDDINGS_PATH, 
                "tokenizer_dir": TOKENIZER_DIR, 
                # If you are using the downloaded bert model then uncomment the line below and comment the other bert model line that uses nemo model from datafabric
                #"bert_model_path": BERT_MODEL_ONLINE_PATH,            
                "bert_model_path": BERT_MODEL_DATAFABRIC_PATH,
                "demo": DEMO_PATH,
            },
            signature=signature
        )

 # Logging Model to MLflow

In [None]:
logger.info(f'Starting the experiment: {EXPERIMENT_NAME}')

mlflow.set_tracking_uri('/phoenix/mlflow')
# Set the MLflow experiment name
mlflow.set_experiment(experiment_name=EXPERIMENT_NAME)

# Start an MLflow run
with mlflow.start_run(run_name=RUN_NAME) as run:
    # Print the artifact URI for reference
    logging.info(f"Run's Artifact URI: {run.info.artifact_uri}")
    
    # Log the BERT similarity model to MLflow
    BERTTourismModel.log_model(model_name=MODEL_NAME)

    # Register the logged model in MLflow Model Registry
    mlflow.register_model(
        model_uri=f"runs:/{run.info.run_id}/{MODEL_NAME}", 
        name=MODEL_NAME
    )

logger.info(f'Registered the model: {MODEL_NAME}')

2025-04-08 21:24:11 - INFO - Starting the experiment: BERT_Tourism_Experiment


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/4 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Registered model 'BERT_Tourism_Model' already exists. Creating a new version of this model...
Created version '2' of model 'BERT_Tourism_Model'.
2025-04-08 21:25:23 - INFO - Registered the model: BERT_Tourism_Model


# Fetching the Latest Model Version from MLflow

In [10]:
# Initialize the MLflow client
client = MlflowClient()

# Retrieve the latest version of the "BERT_Tourism_Model" model (not yet in a specific stage)
model_metadata = client.get_latest_versions(MODEL_NAME, stages=["None"])
latest_model_version = model_metadata[0].version  # Extract the latest model version

# Fetch model information, including its signature
model_info = mlflow.models.get_model_info(f"models:/{MODEL_NAME}/{latest_model_version}")

# Print the latest model version and its signature
print(f"Latest Model Version: {latest_model_version}")
print(f"Model Signature: {model_info.signature}")

Latest Model Version: 2
Model Signature: inputs: 
  ['query': string (required)]
outputs: 
  ['List of Pledges and Similarities': Tensor('object', (-1,))]
params: 
  ['show_score': boolean (default: False)]



# Loading the Model and Running Inference

In [11]:
# Load the trained BERT similarity model from MLflow
model = mlflow.pyfunc.load_model(model_uri=f"models:/{MODEL_NAME}/{latest_model_version}")

# Define a sample query for testing
query = "Give me a resort budget vacation suggestion"

# Use the model to predict similar results based on the query
result = model.predict({"query": [query]})

# Displaying Results for the Input Query

In [12]:
# Convert the result into a pandas DataFrame
df = pd.DataFrame(result)

# Drop unnecessary columns if needed
df = df.drop(columns=["Unnamed: 0", "Topic"], errors="ignore")

# Rename columns for better readability
df.rename(columns={"Pledge": "Recommended Option", "Similarity": "Relevance Score"}, inplace=True)

# Display the DataFrame in a tabular format
print(tabulate(df, headers="keys", tablefmt="fancy_grid"))

╒════╤═════════════════════════════════════════════════════════════════════════════════════════════════════╤═══════════════════╕
│    │ Recommended Option                                                                                  │   Relevance Score │
╞════╪═════════════════════════════════════════════════════════════════════════════════════════════════════╪═══════════════════╡
│  0 │ For a budget-friendly vacation, consider a resort with vacation options and cruise activities.      │          0.869167 │
├────┼─────────────────────────────────────────────────────────────────────────────────────────────────────┼───────────────────┤
│  1 │ For a budget-friendly vacation, consider a getaway with beach options and vacation activities.      │          0.863822 │
├────┼─────────────────────────────────────────────────────────────────────────────────────────────────────┼───────────────────┤
│  2 │ For a budget-friendly vacation, consider a getaway with hotel options and vacation activit

In [13]:
logger.info('Notebook execution completed.')

2025-04-08 21:27:46 - INFO - Notebook execution completed.


Built with ❤️ using Z by HP AI Studio.