# Run validation of LLM-judge evaluation

## Imports

In [1]:
import aisuite as ai
from pathlib import Path
from textentlib.utils import read_configuration
from textentlib.llm_utils import fetch_prompts
from textentlib.llm_utils import query_llm, serialize_llm_responses, LLMrequest, LLMresponse, gt_metadata_to_dataframe

## Read configuration

In [2]:
config = read_configuration(Path('../data/config.yaml'))
llms = config['validation']['models']
#base_path = Path('/Users/mromanel/Documents/UniGe-TextEnt/chrono-spatial-processing/')
base_path = Path('/home/users/r/romanelm/chrono-spatial-processing')
gt_path = base_path / config['validation']['groundtruth_path']
pregen_prompts_path = base_path / config['validation']['pregenerated_prompts_path']

In [3]:
print('\n'.join(llms))

ollama:phi4-mini:latest
ollama:gemma3:12b
ollama:mistral-small:24b
ollama:deepseek-r1:14b
ollama:deepseek-r1:32b
openai:o1-mini
openai:gpt-4o
deepseek:deepseek-reasoner
anthropic:claude-3-7-sonnet-20250219


## Get validation docs and related prompts

In [4]:
df_gt_metadata = gt_metadata_to_dataframe(Path(gt_path))
columns_to_keep = ['author', 'title', 'publication_date', 'document_length', 'keep_fine_tuning']
df_annotated_docs = df_gt_metadata[(df_gt_metadata.exclude == 0) & (df_gt_metadata.annotated == 1)][columns_to_keep]

In [5]:
df_validation_docs = df_annotated_docs[df_annotated_docs.keep_fine_tuning == 1]
validation_doc_ids = df_validation_docs.index.to_list()

In [6]:
validation_doc_ids

['bpt6k10901623',
 'bpt6k9807756q',
 'bpt6k852913n',
 'bpt6k5772699f',
 'bpt6k1090242p']

In [7]:
llm_requests = fetch_prompts(pregen_prompts_path, validation_doc_ids)

In [8]:
len(llm_requests)

15

In [9]:
llm_responses = []
llm_responses_path = Path('../data/validation/llm_responses')

client = ai.Client()
client.configure({"ollama" : {"timeout": 600}})

for model in llms:
    llm_responses += query_llm(client, model, llm_requests, llm_responses_path)

Skipping request for document bpt6k10901623[prompt-summary.txt] using model ollama:phi4-mini:latest as it already exists
Skipping request for document bpt6k10901623[prompt-excerpt.txt] using model ollama:phi4-mini:latest as it already exists
Skipping request for document bpt6k10901623[prompt-metadata.txt] using model ollama:phi4-mini:latest as it already exists
Skipping request for document bpt6k9807756q[prompt-summary.txt] using model ollama:phi4-mini:latest as it already exists
Skipping request for document bpt6k9807756q[prompt-excerpt.txt] using model ollama:phi4-mini:latest as it already exists
Skipping request for document bpt6k9807756q[prompt-metadata.txt] using model ollama:phi4-mini:latest as it already exists
Skipping request for document bpt6k5772699f[prompt-excerpt.txt] using model ollama:phi4-mini:latest as it already exists
Skipping request for document bpt6k5772699f[prompt-summary.txt] using model ollama:phi4-mini:latest as it already exists
Skipping request for document 

ValueError: OpenAI API key is missing. Please provide it in the config or set the OPENAI_API_KEY environment variable.

## Prepare data for human scoring