# Register Model

## Notebook Overview

- Start Execution
- Install and Import Libraries
- Configure Settings
- Model Service Registration to MLFlow

# Start Execution

In [1]:
import logging
import time

# Configure logger
logger: logging.Logger = logging.getLogger("register_model_logger")
logger.setLevel(logging.INFO)
logger.propagate = False  # Prevent duplicate logs from parent loggers

# Set formatter
formatter: logging.Formatter = logging.Formatter(
    fmt="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)

# Configure and attach stream handler
stream_handler: logging.StreamHandler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)

In [2]:
start_time = time.time()  

logger.info("Notebook execution started.")

2025-08-19 12:50:16 - INFO - Notebook execution started.


# Install and Import Libraries

In [3]:
%%time

%pip install -r ../requirements.txt --quiet

Note: you may need to restart the kernel to use updated packages.
CPU times: user 49 ms, sys: 18.2 ms, total: 67.2 ms
Wall time: 2.32 s


In [4]:

# === Standard Library Imports ===
import os
import sys
import json
import warnings
from datetime import datetime
from pathlib import Path

# Define the relative path to the 'src' directory (two levels up from current working directory)
src_path = os.path.abspath(os.path.join(os.getcwd(), ".."))

# Add 'src' directory to system path for module imports (e.g., utils)
if src_path not in sys.path:
    sys.path.append(src_path)

# === Third-Party Imports ===
import numpy as np
import pandas as pd
import mlflow
from typing import List

# Import transformers from huggingface
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

#Import components of notebook
from core.extract_text.arxiv_search import ArxivSearcher
from core.generator.script_generator import ScriptGenerator
from core.analyzer.scientific_paper_analyzer import ScientificPaperAnalyzer
from core.deploy.text_generation_service import TextGenerationService

#import langchain libraries
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema import StrOutputParser
from langchain_huggingface import HuggingFacePipeline, HuggingFaceEndpoint
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_community.llms import LlamaCpp

# === Project-Specific Imports (from src.utils) ===
from src.utils import (
    load_config,
    load_secrets,
    load_secrets_to_env,
    configure_proxy,
    initialize_llm,
    configure_hf_cache
)

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

# Import the TextGenerationService class
from core.deploy.text_generation_service import TextGenerationService

  param_names = _check_func_signature(func, "predict")


# Configure Settings

In [5]:
# ------------------------ Suppress Verbose Logs ------------------------
warnings.filterwarnings("ignore")

In [6]:
# In case you just want to run this cell without the rest of the notebook 
# (you still need to install the requirements and run the import block), run the following block:
CONFIG_PATH = "../configs/config.yaml"
SECRETS_PATH = "../configs/secrets.yaml"
MODEL_PATH = "/home/jovyan/datafabric/meta-llama3.1-8b-Q8/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf"

# Define demo folder path
DEMO_FOLDER = "../demo"

In [7]:
# Load secrets from secrets.yaml file (if it exists) into environment
if Path(SECRETS_PATH).exists():
    load_secrets_to_env(SECRETS_PATH)
else:
    print(f"No secrets file found at {SECRETS_PATH}; relying on preexisting environment")

# Retrieve secrets from environment
try:
    secrets = load_secrets()
except ValueError:
    secrets = {}

# Load configuration and secrets
config = load_config(CONFIG_PATH)

print("✅ Configuration loaded successfully")
print("✅ Secrets loaded successfully")

✅ Loaded 2 secrets into environment variables.
✅ Configuration loaded successfully
✅ Secrets loaded successfully


# Model Service Registration to MLFlow

In this section, we implement the **Model Service**, a REST API responsible for serving the language model. The API is automatically documented using Swagger (via FastAPI), enabling interactive testing and clear documentation of the endpoints.

## Text Generation Service

This section demonstrates how to use our TextGenerationService from the src/service directory. This approach improves code organization by separating the service implementation from the notebook, making it easier to maintain and update.

In [8]:
%%time

mlflow.set_tracking_uri('/phoenix/mlflow')
# Set up the MLflow experiment
mlflow.set_experiment("Text-Generation-service")

# Check if the model file exists
if not os.path.exists(MODEL_PATH):
    print(f"Warning: Model file not found at {MODEL_PATH}. You may need to update the path.")

#Only logs the model path in the case where it is local
if config["model_source"] == "local":
    model_path = MODEL_PATH
else:
    model_path = None


# Use the TextGenerationService's log_model method to register the model in MLflow
with mlflow.start_run(run_name="Script-Generation") as run:
    # Log and register the model using the service's classmethod
    TextGenerationService.log_model(
        llm_artifact = MODEL_PATH,
        config_path = CONFIG_PATH,
        secrets_dict = secrets if secrets else None,
        demo_folder = DEMO_FOLDER
    )
    
    # Register the model in MLflow Model Registry
    model_uri = f"runs:/{run.info.run_id}/script_generation_model"
    mlflow.register_model(model_uri=model_uri, name="Script-Generation-Service")
    print(f"Model registered successfully with run ID: {run.info.run_id}")

2025-08-19 12:50:27,494 | INFO | Secrets artifact written to temporary file /tmp/tmphhrqz9vq.yaml


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/4 [00:00<?, ?it/s]

Model registered successfully with run ID: acc64804ca1c4482a2b88bdf23b4bc4a
CPU times: user 1.32 s, sys: 16.6 s, total: 17.9 s
Wall time: 18 s


Registered model 'Script-Generation-Service' already exists. Creating a new version of this model...
Created version '3' of model 'Script-Generation-Service'.


In [9]:
end_time: float = time.time()
elapsed_time: float = end_time - start_time
elapsed_minutes: int = int(elapsed_time // 60)
elapsed_seconds: float = elapsed_time % 60

logger.info(f"⏱️ Total execution time: {elapsed_minutes}m {elapsed_seconds:.2f}s")
logger.info("✅ Notebook execution completed successfully.")

2025-08-19 12:50:45 - INFO - ⏱️ Total execution time: 0m 29.25s
2025-08-19 12:50:45 - INFO - ✅ Notebook execution completed successfully.


Built with ❤️ using [**HP AI Studio**](https://hp.com/ai-studio).