<h1 style=\"text-align: center; font-size: 50px;\"> Isef Evaluation with llama </h1>

# Notebook Overview
- Imports
- Configurations
- Verify Assets
- Load Data
- Helper Functions
- Batch Evaluation Loop
- Sort TotalScore and scores

# Imports

In [None]:
%pip install -r ../requirements.txt --quiet

In [None]:
# 1. Imports & Setup
import time
import json
import logging
from pathlib import Path
import re
from typing import List, Dict, Any
import httpx
import os
import sys
import warnings
import torch

import pandas as pd
from tqdm.auto import tqdm
from llama_cpp import Llama, LlamaGrammar
import multiprocessing

# === Internal modules ===

# Add 'src' directory to system path (2 levels up)
src_path = os.path.abspath(os.path.join(os.getcwd(), "../.."))
if src_path not in sys.path:
    sys.path.append(src_path)

# Configurations

In [None]:
# Suppress Python warnings
warnings.filterwarnings("ignore")

In [None]:
# Configuration Constants
BATCH_SIZE = 5

SYSTEM_INSTRUCTIONS = (
    "You are an expert evaluator. Score each abstract 1–10 on:\n"
    "- Originality\n"
    "- ScientificRigor\n"
    "- Clarity\n"
    "- Relevance\n"
    "- Feasibility\n\n"
    "Respond *only* with a valid JSON object of the form:\n"
    "{\n"
    '  "results": [\n'
    "    {\"BoothNumber\": \"...\", \"Originality\": 7, …}\n"
    "  ]\n"
    "}\n"
    "Do not include any other text, explanation, or markup."
)

GRAMMAR_URL = "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/grammars/json_arr.gbnf"
MODEL_PATH = "/home/jovyan/datafabric/llama2-7b/ggml-model-f16-Q5_K_M.gguf"

In [None]:
%%time

# 2. Load & Configure Local LLaMA
# Adjust path to your GGUF model file
#local_model_path = "/home/jovyan/datafabric/llama2-7b/ggml-model-f16-Q5_K_M.gguf"
#local_model_path = "/home/jovyan/local/llama3.1-8b/Meta-Llama-3.1-8B-Instruct-Q6_K.gguf"
#local_model_path = "/home/jovyan/datafabric/Meta-Llama-3-8B-Instruct-Q8_0/Meta-Llama-3-8B-Instruct-Q8_0.gguf"
local_model_path = MODEL_PATH 

# Instantiate a LLaMA model optimized for accurate, repeatable
# evaluation of arbitrary texts (e.g. abstracts, essays, reports)
llm = Llama(
    model_path=local_model_path,    # path to your local GGUF/LLAMACPP model
    # ─────── GPU & batching ───────
    n_gpu_layers=-1,                # offload all transformer layers to GPU
    n_batch=512,                    # batch size for token scoring / generation
    # ─────── Context & length ───────
    n_ctx=8192,                     # large context window for longer inputs
    max_tokens=512,                 # cap on generated tokens per call
    # ─────── Precision & memory ───────
    f16_kv=True,                    # FP16 key/value cache for lower VRAM use
    use_mmap=True,                  # memory-map the model file for fast load
    use_mlock=False,                # don’t lock pages into RAM (optional)
    low_vram=False,                 # assume you have ≥8 GB VRAM
    rope_scaling="dynamic",         # adjust rotary embeddings for >2048 context
    # ─────── Deterministic sampling ───────
    temperature=0.0,                # no randomness—same output every time
    repeat_penalty=1.0,             # mild penalty; loops are rare at temp=0
    # ─────── Execution & performance ───────
    streaming=False,                # return full output in one go
    stop=None,                      # define stop sequences if needed
    seed=42,                        # reproducible results
    num_threads=multiprocessing.cpu_count(),
    verbose=False                   # suppress internal logs
)

CPU times: user 2.09 s, sys: 16 s, total: 18.1 s
Wall time: 8min 43s


In [None]:
logger = logging.getLogger("isef_evalation_llama_logger")
logger.setLevel(logging.INFO)

formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
logger.propagate = False

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
logger.info('Notebook execution started.')

## Verify Assets

In [None]:
def log_asset_status(asset_path: str, asset_name: str, success_message: str, failure_message: str) -> None:
    """
    Logs the status of a given asset based on its existence.

    Parameters:
        asset_path (str): File or directory path to check.
        asset_name (str): Name of the asset for logging context.
        success_message (str): Message to log if asset exists.
        failure_message (str): Message to log if asset does not exist.
    """
    if Path(asset_path).exists():
        logger.info(f"{asset_name} is properly configured. {success_message}")
    else:
        logger.info(f"{asset_name} is not properly configured. {failure_message}")


# Check and log status for model
log_asset_status(
    asset_path=MODEL_PATH ,
    asset_name="LLaMA Local model",
    success_message="",
    failure_message="Please create and download the required assets in your project on AI Studio."
)

# Load Data

In [None]:
# Load Data
input_path = "../2025 ISEF Project Abstracts.csv"
output_path = "../Sorted by Score - 2025 ISEF Project Abstracts.csv"

df = pd.read_csv(input_path)
df["BoothNumber"] = df["BoothNumber"].astype(str)

In [4]:
df.shape

(10, 4)

In [5]:
df.head()

Unnamed: 0,BoothNumber,ParentCategory,ProjectTitle,AbstractText
0,ANIM001,Animal Sciences,Investigating the Synergistic Effects of High-...,The project targeted two specific nutrients ty...
1,ANIM002,Animal Sciences,Evaluating the Efficacy of Novel Carbon Dioxid...,"Honeybees are indispensable pollinators, contr..."
2,ANIM003,Animal Sciences,Circadian Evolution in Action: How Latitude Sh...,The circadian rhythm is a 24-hour biological c...
3,ANIM004T,Animal Sciences,Tube-Worm Hunters: Ecological Aspects of Ficop...,Non-native species pose a global threat to aqu...
4,ANIM005,Animal Sciences,PawPath: An IMU-Based Gait Detection and Disea...,"PawPath is a non-invasive, risk-free gait moni..."


# Helper Functions

In [None]:
#  Helper Functions
def chunk_list(lst: List[int], size: int) -> List[List[int]]:
    "Split a list of indices into chunks of `size`."
    return [lst[i : i + size] for i in range(0, len(lst), size)]


# Load the “json_arr” grammar that enforces a top-level JSON array
grammar_text = httpx.get(GRAMMAR_URL).text
json_arr_grammar = LlamaGrammar.from_string(grammar_text)

def evaluate_batch(batch_df: pd.DataFrame) -> List[Dict[str, Any]]:
    """
    Scores a batch of abstracts under the json_arr grammar,
    then returns the parsed list of result dicts.
    """
    # 1. Build payload
    payload = [
        {"BoothNumber": str(r.BoothNumber), "AbstractText": r.AbstractText}
        for _, r in batch_df.iterrows()
    ]
    prompt = SYSTEM_INSTRUCTIONS + "\n\n" + json.dumps(payload, indent=2)

    # 2. Call the model (grammar locked to valid JSON array)
    resp: Dict[str, Any] = llm(
        prompt,
        grammar=json_arr_grammar,
        max_tokens=-1,
        temperature=0.0,
    )
    

    text = resp["choices"][0]["text"]

    # Parse whatever shape we get
    data = json.loads(text)
    if isinstance(data, list):
        # grammar produced an array directly
        return data
    elif isinstance(data, dict):
        # grammar produced an object with "results"
        return data.get("results", [])
    else:
        raise RuntimeError(f"Unexpected JSON shape: {type(data)}\n{text}")

# Batch Evaluation Loop

In [None]:
#  Batch Evaluation Loop
results: List[Dict[str, Any]] = []
start_time = time.time()

for batch_idxs in tqdm(
    chunk_list(df.index.tolist(), BATCH_SIZE),
    desc="Scoring batches",
    unit="batch"
):
    batch_df = df.loc[batch_idxs]
    batch_results = evaluate_batch(batch_df)
    results.extend(batch_results)

Scoring batches:   0%|          | 0/2 [00:00<?, ?batch/s]

# Sort TotalScore and scores

In [9]:
# — 1. Filter only the valid batch dicts —
valid_batches = [
    batch for batch in results
    if isinstance(batch, dict) and "results" in batch
]

# — 2. Flatten the inner 'results' lists —
flat_results = [
    rec
    for batch in valid_batches
    for rec in batch["results"]
]

# — 3. Build scores DataFrame —
scores_df = pd.DataFrame(flat_results)

# Ensure the key is string on both sides
df["BoothNumber"]       = df["BoothNumber"].astype(str)
scores_df["BoothNumber"] = scores_df["BoothNumber"].astype(str)

# — 4. Merge back into your original df —
combined = df.merge(scores_df, on="BoothNumber", how="left")

# — 5. Compute TotalScore & sort —
score_cols    = ["Originality", "ScientificRigor", "Clarity", "Relevance", "Feasibility"]
combined["TotalScore"] = combined[score_cols].sum(axis=1)
combined.sort_values("TotalScore", ascending=False, inplace=True)
combined.reset_index(drop=True, inplace=True)

# Now `combined` has your projects + all five scores + TotalScore, sorted.
combined

Unnamed: 0,BoothNumber,ParentCategory,ProjectTitle,AbstractText,Originality,ScientificRigor,Clarity,Relevance,Feasibility,TotalScore
0,ANIM010,Animal Sciences,Generating RNAi Pesticides to Specifically Tar...,Fire ants cause billions of dollars of economi...,9,9,9,9,9,45
1,ANIM002,Animal Sciences,Evaluating the Efficacy of Novel Carbon Dioxid...,"Honeybees are indispensable pollinators, contr...",9,9,9,9,8,44
2,ANIM006,Animal Sciences,Tailsense: Classifying Dogs' Emotions Using Ba...,Purpose\r\nDog barks and visual cues are main ...,8,9,9,8,9,43
3,ANIM005,Animal Sciences,PawPath: An IMU-Based Gait Detection and Disea...,"PawPath is a non-invasive, risk-free gait moni...",8,9,8,8,9,42
4,ANIM009,Animal Sciences,Behind the Banner: Antibiotic Resistance in Li...,The purpose of this study was to investigat...,7,9,9,8,8,41
5,ANIM004T,Animal Sciences,Tube-Worm Hunters: Ecological Aspects of Ficop...,Non-native species pose a global threat to aqu...,7,8,8,8,9,40
6,ANIM003,Animal Sciences,Circadian Evolution in Action: How Latitude Sh...,The circadian rhythm is a 24-hour biological c...,8,8,8,8,7,39
7,ANIM007,Animal Sciences,Investigating the Infection Rates of Hector's ...,Hector’s lantern fish (Lampanyctodes hectoris)...,6,8,8,7,8,37
8,ANIM001,Animal Sciences,Investigating the Synergistic Effects of High-...,The project targeted two specific nutrients ty...,4,6,7,5,8,30
9,ANIM008T,Animal Sciences,Full Observation System for Monitoring Animal ...,"In many farms around the world, production is ...",4,5,6,5,7,27


In [None]:
logger.info('Notebook execution completed.')

Built with ❤️ using Z by HP AI Studio.