<h1 style=\"text-align: center; font-size: 50px;\"> Register Model </h1>

# Notebook Overview

- Start Execution
- Define User Constants
- Install and Import Libraries
- Configure Settings
- Verify Assets
- Load and Validate Data
- Define MLflow Class
- Log the Model to MLflow
- Fetch the Latest Model Version from MLflow
- Load the Model and Run Inference
- Display Evaluation Results
- Save Evaluation Results

# Start Execution

In [1]:
import json
import logging
from pathlib import Path
from datetime import datetime
import time

# Configure logger
logger: logging.Logger = logging.getLogger("register_model_logger")
logger.setLevel(logging.INFO)
logger.propagate = False  # Prevent duplicate logs from parent loggers

# Set formatter
formatter: logging.Formatter = logging.Formatter(
    fmt="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)

# Configure and attach stream handler
stream_handler: logging.StreamHandler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)

In [2]:
start_time = time.time()  
logger.info("Notebook execution started.")

2025-08-05 11:56:21 - INFO - Notebook execution started.


# Define User Constants

In [3]:
# File configuration
INPUT_FILE_NAME: str = "2025 ISEF Project Abstracts.csv"
INPUT_DIR: Path = Path("../data/inputs")
OUTPUT_DIR: Path = Path("../data/outputs")

# Ensure directories exist
INPUT_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

INPUT_PATH: Path = INPUT_DIR / INPUT_FILE_NAME
TIMESTAMP: str = datetime.now().strftime('%Y-%m-%d %H-%M-%S')
OUTPUT_FILE_NAME: str = f"Evaluated - {INPUT_FILE_NAME} - {TIMESTAMP}"
OUTPUT_PATH: Path = OUTPUT_DIR / OUTPUT_FILE_NAME

# Evaluation configuration
KEY_COLUMN: str = "title"
EVAL_COLUMN: str = "abstract"
CRITERIA: dict[str, int] = json.loads(
        json.dumps({
            "Originality": 3,
            "ScientificRigor": 4,
            "Clarity": 2,
            "Relevance": 1,
            "Feasibility": 3,
            "Brevity": 2,
        }),
)

# Percentage of rows to evaluate
PERCENTAGE_ROWS_TO_BE_EVALUATED: float = 0.01

# Install and Import Libraries

In [4]:
%%time

%pip install -r ../requirements.txt --quiet

Note: you may need to restart the kernel to use updated packages.
CPU times: user 36.8 ms, sys: 11.6 ms, total: 48.4 ms
Wall time: 1.55 s


In [5]:
import os
import json
import logging
import multiprocessing
import sys
from pathlib import Path
import warnings
import re

import pandas as pd
import numpy as np
import mlflow
import mlflow.pyfunc
from mlflow.models import ModelSignature
from mlflow.types import Schema, ColSpec, DataType, ParamSpec, ParamSchema, TensorSpec
from mlflow.tracking import MlflowClient
from llama_cpp import Llama

# Add src directory to path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

from src.utils import load_config, configure_proxy

# Configure Settings

In [6]:
warnings.filterwarnings("ignore")

In [7]:
EXPERIMENT_NAME = "EvaluationExperiment"
RUN_NAME = "EvaluationRun"
MODEL_NAME = "EvaluationModel"

LLAMA_MODEL_PATH = "/home/jovyan/datafabric/meta-llama3.1-8b-Q8/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf"

# Load configuration
CONFIG_PATH = "../configs/config.yaml"
config = load_config(CONFIG_PATH)

# Configure proxy if specified
configure_proxy(config)

print("✅ Configuration loaded successfully")

✅ Configuration loaded successfully


# Verify Assets

In [8]:
def log_asset_status(asset_path: str, asset_name: str) -> None:
    """
    Logs the status of a given asset based on its existence.

    Parameters:
        asset_path (str): File or directory path to check.
        asset_name (str): Name of the asset for logging context.
    """
    if Path(asset_path).exists():
        logger.info(f"{asset_name} is properly configured.")
    else:
        logger.info(f"{asset_name} is not properly configured. Please ensure the required asset is correctly configured in your AI Studio project according to the README file.")

In [9]:
log_asset_status(
    asset_path=INPUT_PATH,
    asset_name="Input Data",
)

log_asset_status(
    asset_path=LLAMA_MODEL_PATH,
    asset_name="LLaMA Local model",
)

2025-08-05 11:56:25 - INFO - Input Data is properly configured.
2025-08-05 11:56:25 - INFO - LLaMA Local model is properly configured.


# Load and Validate Data

In [10]:
df = pd.read_csv(INPUT_PATH)

df.head(10)

Unnamed: 0,title,category,year,schools,abstract,country,State,Province,awards
0,Dynamic Response of a Human Neck Replica to Ax...,Energy: Physical,2014.0,set(),Purpose: A human neck replica was made to simu...,United States of America,MN,,['nan']
1,The Effect of Nutrient Solution Concentration ...,Physics and Astronomy,2014.0,set(),Studies comparing the mineral nutrition of hyd...,United States of America,UT,,['nan']
2,Do Air Root Pruning Pots Accelerate Success in...,Physics and Astronomy,2014.0,set(),The purpose of my project was to determine whi...,United States of America,LA,,['nan']
3,Insect-repelling Plants & New Organic Pesticide,Environmental Engineering,2014.0,set(),Organochlorine pesticides in agriculture are n...,United States of America,TX,,['nan']
4,How Do Different Factors Affect the Accuracy o...,Earth and Environmental Sciences,2014.0,set(),The purpose of this experiment is to determine...,United States of America,MN,,['nan']
5,Dye Sensitized Solar Cells: New Structures and...,Engineering Mechanics,2014.0,set(),Although fossil fuels have the capacity to pow...,United States of America,TX,,['Fourth Award of $500']
6,A Novel Method for Determination of Camera Pos...,Embedded Systems,2014.0,set(),The method proposed here solves for the pose o...,United States of America,MO,,['nan']
7,Observational Detection of Solar g-mode Oscill...,Microbiology,2014.0,set(),,United States of America,HI,,"['Third Award of $1,000']"
8,Synthesis of Periodic Mesoporous Organosilicas...,Plant Sciences,2014.0,set(),,United States of America,TX,,"['Second Award of $2,000']"
9,A Novel Mathematical Simulation to Study the D...,Materials Science,2014.0,set(),"Human Immunodeficiency Virus (HIV), the virus ...",United States of America,TX,,['nan']


In [11]:
# Validate required columns
missing_columns: list[str] = [
    col for col in [KEY_COLUMN, EVAL_COLUMN] if col not in df.columns
]
if missing_columns:
    raise KeyError(f"Missing required column(s): {', '.join(missing_columns)}")

# Ensure key column is of string type
df[KEY_COLUMN] = df[KEY_COLUMN].astype(str)

In [12]:
# Determine the number of rows to evaluate (at least 1)
num_rows_to_evaluate: int = max(int(len(df) * PERCENTAGE_ROWS_TO_BE_EVALUATED / 100), 1)

# Select the top rows for evaluation
df = df[:num_rows_to_evaluate]

# Define MLflow Class

In [13]:
class EvaluatorModel(mlflow.pyfunc.PythonModel):
    """
    A PythonModel using a local LLaMA model to evaluate texts by multiple criteria.
    """
    def load_context(self, context):
        """Load LLaMA model from artifacts with optimized configuration."""
        model_path = context.artifacts["llama_model_path"]
        self.llm = Llama(
            model_path=model_path,
            n_gpu_layers=-1,
            n_batch=128,
            n_ctx=8192,
            max_tokens=512,
            f16_kv=True,
            use_mmap=False,
            low_vram=True,
            rope_scaling=None,
            temperature=0.0,
            repeat_penalty=1.0,
            streaming=False,
            stop=None,
            seed=42,
            num_threads=multiprocessing.cpu_count(),
            verbose=False,
        )

    def predict(self, context, model_input: pd.DataFrame, params: dict) -> pd.DataFrame:
        """Evaluate texts using LLaMA model and return scores with total."""
        # Extract parameters
        key_col = params.get("key_column", KEY_COLUMN)
        eval_col = params.get("eval_column", EVAL_COLUMN)
        criteria = params.get("criteria", CRITERIA)
        if isinstance(criteria, str):
            criteria = json.loads(criteria)

        # Validate input
        for col in (key_col, eval_col):
            if col not in model_input.columns:
                raise KeyError(f"Input DataFrame missing column '{col}'")

        df = model_input.copy()
        df[key_col] = df[key_col].astype(str)

        # Helper functions
        def scale_score(raw: int, target: int) -> int:
            """Scale raw score (1-10) to target range."""
            scaled = round((raw / 10) * target)
            return min(max(scaled, 0), target)

        def extract_score(text: str) -> int:
            """Extract numeric score from LLM response text."""
            match = re.search(r"\b(10|[1-9])\b", text)
            return int(match.group(1)) if match else -1

        def eval_criterion(text: str, crit: str) -> int:
            """Evaluate text against criterion using LLM."""
            prompt = (
                f"Evaluate abstract by '{crit}', return integer 1-10 only.\n"
                f"Abstract:\n{text.strip()}\nScore:"
            )
            resp = self.llm(prompt)["choices"][0]["text"]
            return extract_score(resp)


        results = []
        for _, row in df.iterrows():
            scores = {crit: scale_score(eval_criterion(row[eval_col], crit), criteria[crit])
                      for crit in criteria}
            scores[key_col] = row[key_col]
            results.append(scores)

        scored_df = pd.DataFrame(results)
        # Merge & compute total
        merged = df.merge(scored_df, on=key_col)
        merged["TotalScore"] = merged[list(criteria)].sum(axis=1)
        return merged

    @classmethod
    def log_model(
        cls, 
        model_name: str, 
        llama_model_path: str, 
        config_path: str,
        experiment_name: str = EXPERIMENT_NAME
    ):
        """
        Logs and registers this model in MLflow.
        """
        # Define artifacts
        artifacts = {
            "llama_model_path": llama_model_path,
            "config_path": config_path,
            "demo": "../demo",
            }

        params_schema = ParamSchema([
            ParamSpec("key_column",  DataType.string,  'title'),
            ParamSpec("eval_column", DataType.string,  'abstract'),
            ParamSpec("criteria",    DataType.string,  '["Originality","Clarity","Relevance","Feasibility","Feasibility"]'),
        ])
        
        signature = ModelSignature(inputs=None, outputs=None, params=params_schema)

        mlflow.pyfunc.log_model(
            artifact_path=model_name,
            python_model=cls(),
            artifacts=artifacts,
            signature=signature,
            registered_model_name=model_name,
            pip_requirements='../requirements.txt'
        )
        logger.info(f"Model '{model_name}' logged and registered.")

# Log the Model to MLflow

In [14]:
%%time

mlflow.set_tracking_uri('/phoenix/mlflow')
mlflow.set_experiment(EXPERIMENT_NAME)

with mlflow.start_run(run_name=RUN_NAME) as run:
    run_id = run.info.run_id
    logger.info("Run ID: %s", run_id)

    EvaluatorModel.log_model(
        model_name=MODEL_NAME,
        llama_model_path=LLAMA_MODEL_PATH,
        config_path=CONFIG_PATH,
    )

    mlflow.register_model(
        model_uri=f"runs:/{run_id}/{MODEL_NAME}",
        name=MODEL_NAME
    )
    logger.info("Registered model: %s", MODEL_NAME)

2025/08/05 11:56:25 INFO mlflow.tracking.fluent: Experiment with name 'EvaluationExperiment' does not exist. Creating a new experiment.
2025-08-05 11:56:26 - INFO - Run ID: ad240e7bbe9b41d3ae4fb166d1584d91


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Successfully registered model 'EvaluationModel'.
Created version '1' of model 'EvaluationModel'.
2025-08-05 11:59:18 - INFO - Model 'EvaluationModel' logged and registered.
Registered model 'EvaluationModel' already exists. Creating a new version of this model...
Created version '2' of model 'EvaluationModel'.
2025-08-05 11:59:18 - INFO - Registered model: EvaluationModel


CPU times: user 1.43 s, sys: 26.4 s, total: 27.9 s
Wall time: 2min 52s


# Fetch the Latest Model Version from MLflow

In [15]:
# Load latest model
client = MlflowClient()
latest_version = client.get_latest_versions(MODEL_NAME, stages=["None"])[0].version
logger.info(f"Latest model version: {latest_version}")

2025-08-05 11:59:18 - INFO - Latest model version: 2


# Load the Model and Run Inference

In [16]:
%%time

model_uri = f"models:/{MODEL_NAME}/{latest_version}"
model = mlflow.pyfunc.load_model(model_uri)

CPU times: user 1.2 s, sys: 2.76 s, total: 3.96 s
Wall time: 48.4 s


In [17]:
%%time

# Sample input
sample = df
params = {
    "key_column": KEY_COLUMN,
    "eval_column": EVAL_COLUMN,
    "criteria": json.dumps(CRITERIA),
}
preds = model.predict(sample, params=params)
logger.info("Sample inference result: %s", preds)

2025-08-05 12:00:08 - INFO - Sample inference result:                                                title          category  \
0  Dynamic Response of a Human Neck Replica to Ax...  Energy: Physical   

     year schools                                           abstract  \
0  2014.0   set()  Purpose: A human neck replica was made to simu...   

                    country State Province   awards  Originality  \
0  United States of America    MN      NaN  ['nan']            0   

   ScientificRigor  Clarity  Relevance  Feasibility  Brevity  TotalScore  
0                4        2          0            1        1           8  


CPU times: user 1.95 s, sys: 113 ms, total: 2.07 s
Wall time: 2.14 s


# Display Evaluation Results

In [18]:
# Merge original data with evaluation results on the key column
final_df: pd.DataFrame = df.merge(preds, on=KEY_COLUMN)

# Compute total score by summing across all criteria
final_df["TotalScore"] = final_df[list(CRITERIA)].sum(axis=1)

# Sort the DataFrame by total score in descending order
final_df.sort_values(by="TotalScore", ascending=False, inplace=True)

# Preview the top 10 evaluated entries
final_df.head(10)

Unnamed: 0,title,category_x,year_x,schools_x,abstract_x,country_x,State_x,Province_x,awards_x,category_y,...,State_y,Province_y,awards_y,Originality,ScientificRigor,Clarity,Relevance,Feasibility,Brevity,TotalScore
0,Dynamic Response of a Human Neck Replica to Ax...,Energy: Physical,2014.0,set(),Purpose: A human neck replica was made to simu...,United States of America,MN,,['nan'],Energy: Physical,...,MN,,['nan'],0,4,2,0,1,1,8


# Save Evaluation Results

In [19]:
final_df.to_csv(OUTPUT_PATH, index=False)
logger.info(f"✅ Evaluation results successfully saved to: {OUTPUT_PATH}")

2025-08-05 12:00:09 - INFO - ✅ Evaluation results successfully saved to: ../data/outputs/Evaluated - 2025 ISEF Project Abstracts.csv - 2025-08-05 11-56-21


In [20]:
end_time: float = time.time()
elapsed_time: float = end_time - start_time
elapsed_minutes: int = int(elapsed_time // 60)
elapsed_seconds: float = elapsed_time % 60

logger.info(f"⏱️ Total execution time: {elapsed_minutes}m {elapsed_seconds:.2f}s")
logger.info("✅ Notebook execution completed successfully.")

2025-08-05 12:00:09 - INFO - ⏱️ Total execution time: 3m 48.03s
2025-08-05 12:00:09 - INFO - ✅ Notebook execution completed successfully.


Built with ❤️ using [**HP AI Studio**](https://hp.com/ai-studio).