## Imports

In [None]:
import aisuite as ai

In [2]:
client = ai.Client()
client.configure({
  "ollama" : {
    "timeout": 600,
  }
})

## Test `aisuite` with dummy prompts

In [3]:
messages = [
    {"role": "system", "content": "Respond in Pirate English. Always try to include the phrase - No rum No fun."},
    {"role": "user", "content": "Tell me a joke about Captain Jack Sparrow"},
]

In [24]:
!ollama list

NAME               ID              SIZE      MODIFIED          
phi4:latest        ac896e5b8b34    9.1 GB    55 seconds ago       
gemma2:9b          ff02c3702f32    5.4 GB    41 minutes ago       
llama3.2:latest    a80c4f17acd5    2.0 GB    About an hour ago    
deepseek-r1:8b     28f8fd6cdc67    4.9 GB    2 hours ago          
llama3.3:latest    a6eb4748fd29    42 GB     3 weeks ago          


In [27]:
models = [
    #"ollama:deepseek-r1:8b",
    #"ollama:llama:3.3:latest",
    "ollama:llama3.2:latest",
    "ollama:gemma2:9b",
    "ollama:phi4:latest"
]

In [28]:
replies = {}

for selected_model in models:
    response = client.chat.completions.create(model=selected_model, messages=messages)
    replies[selected_model] = response.choices[0].message.content

In [22]:
for k,v in replies.items():
    print(f"Model: {k}; reply's length = {len(v)}")

Model: ollama:llama3.2:latest; reply's length = 243
Model: ollama:gemma2:9b; reply's length = 304


In [29]:
replies

{'ollama:llama3.2:latest': "Yer lookin' fer a joke about that scurvy dog, eh? Alright then, listen close:\n\nWhy did Captain Jack Sparrow bring a ladder aboard his ship?\n\nBecause he heard the drinks were on the house! Arrr, no rum, no fun!",
 'ollama:gemma2:9b': "Ahoy, matey! Ye want a tale 'bout ol' Jack Sparrow? \n\nGather 'round and listen close:\n\nWhy did Captain Jack Sparrow always carry two compasses? \n\nTo be sure he wasn't lost at sea...and to have one to point the way to the nearest grog stash! No rum, no fun, ye hear?  üçªüíÄ\n\n\n",
 'ollama:phi4:latest': 'Ahoy there, matey! Gather \'round for a tale o\' ol\' Cap\'n Jack Sparrow!\n\nSo, what happens when you mix Captain Jack Sparrow with a chicken?\n\nYou get... "Cluckin\' up the wrong ship!"\n\nArrr, no rum, no fun! But remember, ye never know where yer adventure will take ye next!'}

## Query LLMs with real TextEnt data

- for each document, load the pre-generated summary
- based on the summary, for each doc generate 3 prompts (metadata, metadata + incipit, metadata + summary)
- iterate over doc, iterate over prompts per doc, iterate over models, then query with triples (docu, model, prompt)

- start with a spacy document
- load the corresponding pre-generated summary
- define a `build_prompts` function that takes a `spacy_doc` as input and returns a list of tuples `('prompt-id', 'prompt-message')` 

In [98]:
import random
from pathlib import Path
from textentlib.prompting import pre_generate_prompts
from textentlib.utils import load_or_create_corpus, nlp_model_fr

In [99]:
SPACY_CORPUS_SERIALIZED_PATH = Path("../data/corpus_24022025.spacy")
PRE_GENERATED_PROMPTS_PATH = Path("../data/prompts/pregenerated")    
SAMPLE_SIZE = 50

In [100]:
spacy_corpus = load_or_create_corpus(SPACY_CORPUS_SERIALIZED_PATH)

Loaded serialize spacy corpus from ../data/corpus_24022025.spacy
Number of documents in the corpus: 594
Number of entities in the corpus: 287389
Number of tokens in the corpus: 12885306


In [101]:
docs = spacy_corpus.get_docs(nlp_model_fr.vocab)
docs = list(docs)

In [102]:
# TODO:
# - we may want to exclude documents in the validation set
# - we may want to exclude documents that are very long (> 150k tokens)
sampled_docs = random.sample(docs, SAMPLE_SIZE)

In [103]:
len(sampled_docs)

50

In [104]:
len(sampled_docs)

50

In [105]:
pre_generate_prompts(sampled_docs, PRE_GENERATED_PROMPTS_PATH)

Pre-generating prompts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 30.27it/s]







In [9]:
from pathlib import Path
from dataclasses import dataclass

@dataclass
class LLMrequest:
    prompt_id: str
    document_id: str
    prompt_path: Path
    prompt: str

@dataclass
class LLMresponse:
    document_id: str
    prompt_id: str
    prompt: str
    model_name: str
    response: str

In [10]:
import aisuite as ai

client = ai.Client()
client.configure({
  "ollama" : {
    "timeout": 600,
  }
})

In [73]:
import re
import json
import pandas as pd
from typing import Dict

import contextlib
import re
import json

JSON_PATTERN = re.compile(r"```json\n(.*?)```", re.DOTALL)
DIRECT_JSON_PATTERN = re.compile(r"\{[^}]*\}", re.DOTALL)


def try_extract_json_from_text(text: str) -> tuple[str, dict | None]:
    # function taken from https://danielvanstrien.xyz/posts/2025/deepseek/distil-deepseek-modernbert.html
    if match := JSON_PATTERN.search(text):
        json_results = match.group(1)
        with contextlib.suppress(json.JSONDecodeError):
            return text, json.loads(json_results)
    if match := DIRECT_JSON_PATTERN.search(text):
        json_text = match.group(0)
        with contextlib.suppress(json.JSONDecodeError):
            return text, json.loads(json_text)
    return text, None

def process_json_response(response_raw: str) -> Dict:
    # is response empty?
    # is response valid JSON?
    
    output_dict = {}
    output_dict['is_response_empty'] = True if response_raw.strip() == '' else False

    try:
        response_json = json.loads(response_raw)
        output_dict['is_response_valid_json'] = True
        output_dict.update(response_json)
    except json.JSONDecodeError:
        output_dict['is_response_valid_json'] = False
        _, response_json = try_extract_json_from_text(response_raw)
        if response_json:
            output_dict.update(response_json)
    return output_dict

def process_llm_responses(llm_responses_path: Path) -> pd.DataFrame:
    # each sub-folder contains the responses for a given document
    # we need to group the responses by model so that separate dataframes can be generated
    responses = []
    all_response_files = list(llm_responses_path.glob('*/*.txt'))
    for file_path in all_response_files:
        doc_id, prompt_id, model_id = file_path.name.replace('.txt', '').split('_')
        with file_path.open("r", encoding="utf-8") as file:
            response_raw = file.read()
        response = {
            "document_id": doc_id,
            "prompt_id": prompt_id,
            "model_id": model_id,
            "response_raw": response_raw
        }

        response_content = process_json_response(response_raw)
        response.update(response_content)
        responses.append(response)
    return pd.DataFrame(responses)

In [87]:
llm_responses_path = Path('../data/llm_responses')
data = process_llm_responses(llm_responses_path)

In [89]:
data[data['is_response_valid_json']==True]

Unnamed: 0,document_id,prompt_id,model_id,response_raw,is_response_empty,is_response_valid_json,period,period_reasoning,timeframe_start,timeframe_end,location,location_reasoning,location_qid
0,bpt6k15110748,prompt-excerpt,anthropic-claude-3-7-sonnet-20250219,"{\n ""period"": ""Ancient Greece or Persia"",\n...",False,True,Ancient Greece or Persia,The excerpt mentions a character with a royal ...,0550-01-01,0330-01-01,Ancient Persia (Achaemenid Empire),"The play is titled 'Panth√©e', which likely ref...",Q47246
1,bpt6k15110748,prompt-metadata,anthropic-claude-3-7-sonnet-20250219,"{\n ""period"": ""Antiquity, Achaemenid Empire...",False,True,"Antiquity, Achaemenid Empire",Panth√©e is likely based on the story of Panthe...,0550-01-01,0530-01-01,Ancient Persia (modern-day Iran),The story of Panthea takes place in the Persia...,Q794
4,bpt6k15110748,prompt-summary,anthropic-claude-3-7-sonnet-20250219,"{\n ""period"": ""Ancient Persian Empire (Acha...",False,True,Ancient Persian Empire (Achaemenid Dynasty),"The play prominently features Cyrus, who appea...",0559-01-01,0530-01-01,Persia (Ancient Persian Empire),Among the top 5 places mentioned are 'Perse' (...,Q47246
5,bpt6k15110748,prompt-excerpt,ollama-gemma2-9b,"{\n ""period"": ""16th-18th century Europe"",\n ...",False,True,16th-18th century Europe,The play is a French tragedy from the XVII cen...,1500-01-01,1800-12-31,France or a European kingdom influenced by Fre...,"The author is Tristan L'Hermite, a French play...",
7,bpt6k15110748,prompt-metadata,ollama-phi4-latest,"{\n ""period"": ""Ancient Greece"",\n ""perio...",False,True,Ancient Greece,"Tristan L'Hermite's play 'Panth√©e, trag√©die de...",-800-01-01,-146-12-31,Greece,Given that the play is a tragedy and likely in...,Q48
9,bpt6k15110748,prompt-summary,ollama-gemma2-9b,"{\n ""period"": ""17th century"",\n ""period_...",False,True,17th century,The metadata indicates the play was published ...,1600-01-01,1700-12-31,Perse,The text mentions Cyrus and the play likely dr...,Q895
11,bpt6k15110748,prompt-metadata,ollama-gemma2-9b,"{\n ""period"": ""Classical antiquity"",\n ""...",False,True,Classical antiquity,"The title 'Panth√©e' directly refers to Pan, a ...",1000-01-01,500-01-01,Ancient Greece,"The mention of Pan, a Greek god, strongly sugg...",Q394
12,bpt6k8569801,prompt-metadata,ollama-gemma2-9b,"{\n ""period"": ""17th century"",\n ""period_reas...",False,True,17th century,"The publication date is 1699, placing it withi...",1600-01-01,1700-01-01,Lyon,"The title of the play is 'Le Carnaval de Lyon,...",Q894
13,bpt6k8569801,prompt-excerpt,ollama-gemma2-9b,"{\n ""period"": ""17th century"",\n ""period_...",False,True,17th century,The metadata indicates the play was published ...,1600-01-01,1700-01-01,Lyon,The title of the play is 'Le Carnaval de Lyon'...,Q24683
15,bpt6k8569801,prompt-metadata,ollama-phi4-latest,"{\n ""period"": ""Late 17th century France"",\n...",False,True,Late 17th century France,"The play was published in 1699, and it is titl...",1698-01-01,1700-12-31,"Lyon, France",The title 'Le Carnaval de Lyon' directly sugge...,Q1524


In [80]:
data[(data['document_id'] == 'bpt6k15110748') & (data['prompt_id'] == 'prompt-summary')]

Unnamed: 0,document_id,prompt_id,model_id,response_raw,is_response_empty,is_response_valid_json,period,period_reasoning,timeframe_start,timeframe_end,location,location_reasoning,location_qid
3,bpt6k15110748,prompt-summary,ollama-llama3.2-latest,,True,False,,,,,,,
4,bpt6k15110748,prompt-summary,anthropic-claude-3-7-sonnet-20250219,"{\n ""period"": ""Ancient Persian Empire (Acha...",False,True,Ancient Persian Empire (Achaemenid Dynasty),"The play prominently features Cyrus, who appea...",0559-01-01,0530-01-01,Persia (Ancient Persian Empire),Among the top 5 places mentioned are 'Perse' (...,Q47246
9,bpt6k15110748,prompt-summary,ollama-gemma2-9b,"{\n ""period"": ""17th century"",\n ""period_...",False,True,17th century,The metadata indicates the play was published ...,1600-01-01,1700-12-31,Perse,The text mentions Cyrus and the play likely dr...,Q895
10,bpt6k15110748,prompt-summary,ollama-phi4-latest,"{\n ""period"": ""Achaemenid Empire, particula...",False,False,,,,,,,


In [69]:
data[(data['is_response_empty'] == False) & (data['is_response_valid_json'] == False)]

Unnamed: 0,document_id,prompt_id,model_id,response_raw,is_response_empty,is_response_valid_json,period,period_reasoning,timeframe_start,timeframe_end,location,location_reasoning,location_qid
6,bpt6k15110748,prompt-excerpt,ollama-phi4-latest,"{\n ""period"": ""17th century France"",\n ""...",False,False,,,,,,,
10,bpt6k15110748,prompt-summary,ollama-phi4-latest,"{\n ""period"": ""Achaemenid Empire, particula...",False,False,,,,,,,
14,bpt6k8569801,prompt-excerpt,ollama-phi4-latest,"{\n ""period"": ""Late 17th century France"",\n...",False,False,,,,,,,
27,btv1b8622118r,prompt-summary,ollama-phi4-latest,"{\n ""period"": ""17th century Europe"",\n ""...",False,False,,,,,,,


In [15]:
llm_requests = []

for subdir in Path('../data/prompts/pregenerated').iterdir():
    for file in subdir.iterdir():
        doc_id, prompt_id = file.name.split('_')
        prompt_id = prompt_id.split('.')[0]
        prompt = file.read_text()
        llm_requests.append(LLMrequest(prompt_id, doc_id, file, prompt))

In [16]:
print(len(llm_requests))

147


In [27]:
llm_responses = []
llm_responses_path = Path('../data/llm_responses')

models = [
    #"ollama:deepseek-r1:8b",
    #"ollama:llama:3.3:latest",
    "deepseek:deepseek-reasoner",
    "anthropic:claude-3-7-sonnet-20250219",
    #"ollama:llama3.2:latest",
    #"ollama:gemma2:9b",
    #"ollama:phi4:latest"
]

for model in models:
    llm_responses += query_llm(model, llm_requests[:3], llm_responses_path)

serialize_llm_responses(llm_responses, Path(llm_responses_path))

Processing prompt prompt-summary for document bpt6k15110748 using model anthropic:claude-3-7-sonnet-20250219
Processing prompt prompt-metadata for document bpt6k15110748 using model anthropic:claude-3-7-sonnet-20250219
Processing prompt prompt-excerpt for document bpt6k15110748 using model anthropic:claude-3-7-sonnet-20250219


In [28]:
for r in llm_responses:
    print(f'Model: {r.model_name}; Prompt: {r.prompt}')
    print(f'Response: {r.response}')

Model: anthropic:claude-3-7-sonnet-20250219; Prompt: Look at the following JSON object describing a theatre play in French (XVII century); the `metadata` property contains basic information about the play (author, title, publication date), while the `context` property contains information about the people and places that are most frequently mentioned in the play (such as label, mention frequency, and salient sentences where it appears).

INPUT:
```json
{
  "metadata": {
    "author": "Tristan L'Hermite",
    "title": "Panth√©e, trag√©die de M. de Tristan",
    "publication_date": "1639",
    "document_id": "bpt6k15110748"
  },
  "context": {
    "people": {
      "top_1_person": {
        "entity": {
          "label": "CYRUS",
          "frequency": 3
        },
        "related_sentences": [
          "PANTH√âE\n Seigneur, votre bont√© s'est acquise Abradate J'ai d√©p√™ch√© des miens pour lui faire savoir Qu'elles sont vos vertus, et quel est son devoir: S'il n'a chang√© d'esprit j'o

## `astropy` detour

In [148]:
import numpy as np
from astropy.time import Time

In [240]:
Range = namedtuple('Range', ['start', 'end'])

In [199]:
r1 = Range(start=Time(-480, format='jyear'), end=Time(-430, format='jyear'))

In [244]:
r2 = Range(start=Time(-300, format='jyear'), end=Time(210, format='jyear'))

In [245]:
latest_start = max(r1.start, r2.start)
earliest_end = min(r1.end, r2.end)
delta = (earliest_end - latest_start)

In [246]:
delta.jd // 365

np.float64(-131.0)

In [243]:
len(np.arange(r1.start.jd, r1.end.jd)) // 365

50

In [247]:
len(np.arange(r2.start.jd, r2.end.jd)) // 365

510

## Querying LLMs with hand-picked documents

- seed docs: find IDs of documents to process (n=5)
- find all pre-gen prompts for the seed docs
- query LLMs and serialize responses 

In [1]:
import random
from typing import List, Dict, Tuple
from pathlib import Path
from textentlib.llm_utils import query_llm, serialize_llm_responses, LLMrequest, LLMresponse

In [2]:
import aisuite as ai

client = ai.Client()
client.configure({
  "ollama" : {
    "timeout": 600,
  }
})

In [3]:
seed_document_ids = [
    "bpt6k9807756q", # ancient greece
    "bpt6k852913n", # early modern period
    "bpt6k1090242p", # ancient rome
    "bpt6k5772699f", # biblical times
    "bpt6k10901623", # middle ages
]

In [4]:
def fetch_prompts(input_path: Path, keep_document_ids: List[str]) -> List[LLMrequest]:
    """
    Fetches pre-generated prompts from the specified directory and returns a list of LLMrequest objects.

    Args:
        input_path (Path): The directory path where the prompts are located.
        keep_document_ids (List[str]): A list of document IDs to filter which files to keep.

    Returns:
        List[LLMrequest]: A list of LLMrequest objects containing the prompt ID, document ID, file path, and prompt text.
    """
    requests = []
    for file in input_path.glob(f"*/*.txt"):
        doc_id, prompt_id = file.name.split('_')
        if doc_id in keep_document_ids:
            prompt = file.read_text()
            requests.append(LLMrequest(prompt_id, doc_id, file, prompt))
    return requests

In [5]:
llm_requests = fetch_prompts(Path('../data/prompts/pregenerated'), seed_document_ids)

In [6]:
len(llm_requests)

15

In [7]:
llm_requests

[LLMrequest(prompt_id='prompt-excerpt.txt', document_id='bpt6k10901623', prompt_path=PosixPath('../data/prompts/pregenerated/bpt6k10901623/bpt6k10901623_prompt-excerpt.txt'), prompt='Look at the following JSON object describing a theatre play in French (XVII century); the `metadata` property contains basic information about the play (author, title, publication date), while the `excerpt` property contains an excerpt of 400 words sampled from around the middle of the document.\n\nINPUT:\n```json\n{\n  "metadata": {\n    "author": "Boisrobert, Fran√ßois de",\n    "title": "Th√©odore, Reyne de Hongrie, tragi-com√©die",\n    "publication_date": "1658",\n    "document_id": "bpt6k10901623"\n  },\n  "excerpt": "re; Oui j\'ai piti√© de vous, Prince, et je vous promets, Si vous vous repentez, de n\'y penser jamais, Je me reprocherai cette ardeur enrag√©e, Comme si on l\'avais bizarrement song√©e, Revenez donc √† vous, ouvrez, ouurez les yeux, Et voyez o√π vous porte un d√©sir furieux, Seriez-vou

In [None]:
llm_responses = []
llm_responses_path = Path('../data/llm_responses')

# I have no API access to `openai:o1` and `openai:o3`
# add perhaps a Mistral model?
# phi4-mini would be nice
models = [
    "openai:o1-mini",
    "openai:gpt-4o",
    "deepseek:deepseek-reasoner",
    "anthropic:claude-3-7-sonnet-20250219",
    "ollama:phi4-mini:latest", # replaced `ollama:phi4:latest`  
    "ollama:gemma2:9b", # should replace with gemma3 (`ollama:gemma3:12b`) 
    #"ollama:deepseek-r1:8b",
]

for model in models:
    llm_responses += query_llm(client, model, llm_requests, llm_responses_path)

Processing prompt prompt-excerpt.txt for document bpt6k10901623 using model openai:o1


LLMError: An error occurred: Error code: 404 - {'error': {'message': 'The model `o1` does not exist or you do not have access to it.', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}

## Prepare data for evaluation

 - read all GT annotations into a dataframe, with document_id as the index
 - read all LLM responses into a dataframe, with document_id as the index
    - append `prompt`, `model` and `document_id` ‚Äì which are all coming from the filename
 - merge the two dfs, using `gt_` as a prefix for the GT annotations dataframe, and `pred_` as a prefix for the LLM responses dataframe. 

In [None]:
import pandas as pd
from pathlib import Path

def llm_responses_to_dataframe(responses_base_path: Path) -> pd.DataFrame:
    df = process_llm_responses(responses_base_path)
    df.drop(columns=['response_raw', 'timeframe_reasoning'], inplace=True)
    
    # fusion timeframe_start and timeframe_end into a single column
    df['timeframe'] = df['timeframe_start'].astype(str) + ', ' + df['timeframe_end'].astype(str)
    df.drop(columns=['timeframe_start', 'timeframe_end'], inplace=True)

    # create a unique response ID
    df['response_id'] = df['document_id'].astype(str) + '$' + df['prompt_id'].astype(str) + '$' + df['model_id'].astype(str)
    df.set_index('response_id', inplace=True, drop=True)
    
    # rename only selected columns
    prediction_columns = ['period', 'period_reasoning', 'location', 'location_reasoning', 'location_qid', 'timeframe']
    cols = df.columns[df.columns.str.contains('|'.join(prediction_columns))]
    df.rename(columns={col: 'pred_' + col for col in df.columns if col in cols}, inplace=True)
    return df

def gt_annotations_to_dataframe(gt_base_path: Path, filename: str = 'textent-annotations - groundtruth-annotations.tsv') -> pd.DataFrame:
    df = pd.read_csv(gt_base_path / filename, sep='\t').set_index('document_id')
    df.drop(columns=['Unnamed: 11', 'Unnamed: 12', 'author', 'title', 'Anthology'], inplace=True)
    df['timeframe'] = df['timeframe_start'].astype(str) + ', ' + df['timeframe_end'].astype(str)
    df.drop(columns=['timeframe_start', 'timeframe_end'], inplace=True)
    return df.add_prefix('gt_')

def gt_metadata_to_dataframe(gt_base_path: Path, filename: str = 'textent-annotations - sample-metadata.tsv') -> pd.DataFrame:
    return pd.read_csv(gt_base_path / filename, sep='\t').set_index('document_id')

In [105]:
df_llm_responses = llm_responses_to_dataframe(Path('../data/llm_responses'))

In [106]:
df_llm_responses.tail()

Unnamed: 0_level_0,document_id,prompt_id,model_id,is_response_empty,is_response_valid_json,pred_period,pred_period_reasoning,pred_location,pred_location_reasoning,pred_location_qid,pred_timeframe
response_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
bpt6k9807756q$prompt-summary$anthropic-claude-3-7-sonnet-20250219,bpt6k9807756q,prompt-summary,anthropic-claude-3-7-sonnet-20250219,False,True,"Ancient Greece, Hellenistic period","The play mentions Pyrrhus, who is likely Pyrrh...",Sparta,"The title of the play is 'Zelonide, princesse ...",Q5690,"-323, -272"
bpt6k9807756q$prompt-metadata$deepseek-deepseek-reasoner,bpt6k9807756q,prompt-metadata,deepseek-deepseek-reasoner,False,True,Classical Greece,"The play's title references Sparta, a prominen...",Sparta,The title explicitly states 'princesse de Spar...,Q5699,"-800, -300"
bpt6k9807756q$prompt-metadata$ollama-gemma2-9b,bpt6k9807756q,prompt-metadata,ollama-gemma2-9b,False,True,Ancient Greek,"The title mentions 'Zelonide, princesse de Spa...",Greece,Sparta is located in Greece.,Q182,"-500, 146"
bpt6k9807756q$prompt-metadata$anthropic-claude-3-7-sonnet-20250219,bpt6k9807756q,prompt-metadata,anthropic-claude-3-7-sonnet-20250219,False,True,Ancient Sparta,"The title 'Zelonide, princesse de Sparte' expl...",Sparta,"The title directly states 'Zelonide, princesse...",Q5690,"-800, -146"
bpt6k9807756q$prompt-excerpt$anthropic-claude-3-7-sonnet-20250219,bpt6k9807756q,prompt-excerpt,anthropic-claude-3-7-sonnet-20250219,False,True,Ancient Greece,"The excerpt mentions 'Sparte' (Sparta), an anc...",Sparta,The title of the play explicitly mentions 'Zel...,Q5690,"-900, -146"


In [71]:
df_gt_annotations = gt_annotations_to_dataframe(Path('../data/groundtruth/'))

In [72]:
df_gt_annotations

Unnamed: 0_level_0,gt_period,gt_period_reason,gt_location,gt_location_reason,gt_location_QID,gt_timeframe
document_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
bpt6k5745752g,,,,,,"nan, nan"
bpt6k5752605t,Early modern,"- it's a comedy - typical characters: Valet, l...",Paris,- Paris is mentioned - French names,Q90,"1600, 1691"
bpt6k852919x,Early modern,- it's a comedy - There is a pr√©v√¥t - Publicat...,Poitiers,- Poitiers is mentioned - French names,Q6616,"1600, 1664"
bpt6k12804007,Early modern,- it's a comedy - Moli√®re is the author - Publ...,Paris,- Paris is mentioned - French names,Q90,"1600, 1662"
bpt6k5772310n,Ancient Greece,- This is a tragedy - Greek myth,Naxos,- Famous myth of Ariane,Q188527,"-1000, -700"
...,...,...,...,...,...,...
bpt6k62514727,Early modern,- Name of the characters - it's a comedy - Pub...,Paris,- La sc√®ne est √† Paris,Q90,"1600, 1662"
bpt6k12804415,Early modern,- Name of the characters - it's a comedy - Pub...,Paris,- Place Maubert is mentioned - Le Louvre is me...,Q90,"1600, 1663"
bpt6k8528543,Early modern,- Name of the characters - it's a tragi-comedy...,Paris,- Place Maubert is mentioned,Q90,"1600, 1656"
bpt6k5772699f,Biblical times,- History of Belshazzar,babylon,- La sc√®ne est √† Babylone,Q5684,"-580, -539"


In [17]:
df_gt_metadata = gt_metadata_to_dataframe(Path('../data/groundtruth/'))

In [18]:
df_gt_metadata.info()

<class 'pandas.core.frame.DataFrame'>
Index: 70 entries, bpt6k5745752g to bpt6k1090242p
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   author            65 non-null     object 
 1   title             70 non-null     object 
 2   annotated         70 non-null     bool   
 3   exclude           70 non-null     bool   
 4   keep_fine_tuning  70 non-null     bool   
 5   publication_date  70 non-null     int64  
 6   document_length   70 non-null     int64  
 7   link              60 non-null     object 
 8   link_OCR          70 non-null     object 
 9   notes             0 non-null      float64
dtypes: bool(3), float64(1), int64(2), object(4)
memory usage: 4.6+ KB


In [19]:
df_gt_metadata.head()

Unnamed: 0_level_0,author,title,annotated,exclude,keep_fine_tuning,publication_date,document_length,link,link_OCR,notes
document_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
bpt6k5745752g,"Boissin de Gallardon, Jean",Les trag√©dies et histoires saintes de Jean Boi...,False,True,False,1618,338338,http://catalogue.bnf.fr/ark:/12148/cb30122385g,https://github.com/TextEnt/chrono-spatial-proc...,
bpt6k5752605t,"Hauteroche, No√´l Lebreton",Les bourgeoises de qualit√© . Comedie. Par Mr d...,True,False,False,1691,103493,http://catalogue.bnf.fr/ark:/12148/cb30582284s,https://github.com/TextEnt/chrono-spatial-proc...,
bpt6k852919x,,Le Mariage de Fine-Epice. Com√©die,True,False,False,1664,134035,http://catalogue.bnf.fr/ark:/12148/cb39325270g,https://github.com/TextEnt/chrono-spatial-proc...,
bpt6k12804007,Moli√®re,"Sganarelle ou le Cocu imaginaire, com√©die avec...",True,False,False,1662,67710,http://catalogue.bnf.fr/ark:/12148/cb38650865b,https://github.com/TextEnt/chrono-spatial-proc...,
bpt6k5772310n,"Corneille, Thomas","Ariane , trag√©die. Par T. Corneille",True,False,False,1672,76240,http://catalogue.bnf.fr/ark:/12148/cb30272162g,https://github.com/TextEnt/chrono-spatial-proc...,


In [35]:
columns_to_keep = ['author', 'title', 'publication_date', 'document_length', 'keep_fine_tuning']

In [36]:
# filter out documents that are marked as to be excluded or that were not annotated
df_annotated_docs = df_gt_metadata[(df_gt_metadata.exclude == 0) & (df_gt_metadata.annotated == 1)][columns_to_keep]

In [44]:
df_sample_docs = df_annotated_docs[df_annotated_docs.keep_fine_tuning == 1]

In [45]:
df_sample_docs

Unnamed: 0_level_0,author,title,publication_date,document_length,keep_fine_tuning
document_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bpt6k10901623,"Boisrobert, Fran√ßois de","Th√©odore, Reyne de Hongrie, tragi-com√©die",1658,80779,True
bpt6k9807756q,"Genest, Charles-Claude","Zelonide, princesse de Sparte . Tragedie",1682,79661,True
bpt6k852913n,N√©el,"L'illusion grotesque ou le point n√©cromancien,...",1678,39981,True
bpt6k5772699f,"Charenton, De","La mort de Baltazar, roy de Babilone , trag√©di...",1662,82484,True
bpt6k1090242p,,"Tite et Titus, ou Critique sur les Berenices, ...",1673,58275,True


In [60]:
df_sample_gt = df_sample_docs.join(df_gt_annotations, how='inner')

In [61]:
df_sample_gt.head()

Unnamed: 0_level_0,author,title,publication_date,document_length,keep_fine_tuning,gt_period,gt_period_reason,gt_timeframe_start,gt_timeframe_end,gt_location,gt_location_reason,gt_location_QID
document_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
bpt6k10901623,"Boisrobert, Fran√ßois de","Th√©odore, Reyne de Hongrie, tragi-com√©die",1658,80779,True,Middle Ages,- Mention the Battle of Varna (Varne),1444,1444,Sz√©kesfeh√©rv√°r,- name of the town: Albe royale,Q130212
bpt6k9807756q,"Genest, Charles-Claude","Zelonide, princesse de Sparte . Tragedie",1682,79661,True,Ancient Greece,- Siege of Sparta by Pyrrhus of Epirus,-272,-272,Sparta,- Sparta is mentioned,Q5690
bpt6k852913n,N√©el,"L'illusion grotesque ou le point n√©cromancien,...",1678,39981,True,Early modern,- Name of the characters - it's a comedy - Pub...,1600,1678,Ch√¢lons-en-Champagne,- Ch√¢lons-en-Champagne is mentioned,Q47135
bpt6k5772699f,"Charenton, De","La mort de Baltazar, roy de Babilone , trag√©di...",1662,82484,True,Biblical times,- History of Belshazzar,-580,-539,babylon,- La sc√®ne est √† Babylone,Q5684
bpt6k1090242p,,"Tite et Titus, ou Critique sur les Berenices, ...",1673,58275,True,Ancient Rome,- Titus emperor - Date of his reign,79,81,Mount Parnassus,- La sc√®ne est au Parnasse,Q105261


In [67]:
df_sample_gt

Unnamed: 0_level_0,author,title,publication_date,document_length,keep_fine_tuning,gt_period,gt_period_reason,gt_location,gt_location_reason,gt_location_QID,gt_timeframe
document_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
bpt6k10901623,"Boisrobert, Fran√ßois de","Th√©odore, Reyne de Hongrie, tragi-com√©die",1658,80779,True,Middle Ages,- Mention the Battle of Varna (Varne),Sz√©kesfeh√©rv√°r,- name of the town: Albe royale,Q130212,"1444, 1444"
bpt6k9807756q,"Genest, Charles-Claude","Zelonide, princesse de Sparte . Tragedie",1682,79661,True,Ancient Greece,- Siege of Sparta by Pyrrhus of Epirus,Sparta,- Sparta is mentioned,Q5690,"-272, -272"
bpt6k852913n,N√©el,"L'illusion grotesque ou le point n√©cromancien,...",1678,39981,True,Early modern,- Name of the characters - it's a comedy - Pub...,Ch√¢lons-en-Champagne,- Ch√¢lons-en-Champagne is mentioned,Q47135,"1600, 1678"
bpt6k5772699f,"Charenton, De","La mort de Baltazar, roy de Babilone , trag√©di...",1662,82484,True,Biblical times,- History of Belshazzar,babylon,- La sc√®ne est √† Babylone,Q5684,"-580, -539"
bpt6k1090242p,,"Tite et Titus, ou Critique sur les Berenices, ...",1673,58275,True,Ancient Rome,- Titus emperor - Date of his reign,Mount Parnassus,- La sc√®ne est au Parnasse,Q105261,"79, 81"


In [111]:
df_eval_data = df_llm_responses.merge(df_sample_gt, left_on='document_id', right_index=True)

In [None]:
df_eval_data['score_period_string'] = None
df_eval_data['score_period_timeframe'] = None
#df_eval_data['score_period_reasoning'] = None
df_eval_data['score_location_string'] = None
df_eval_data['score_location_qid'] = None
#df_eval_data['score_location_reasoning'] = None

In [None]:
display_columns = [
'prompt_id',
 'model_id',
 'document_id',
'author',
 'title',
 'publication_date',
 'document_length',
 'keep_fine_tuning',
 'gt_period',
 'pred_period',
 'score_period_string',
 'gt_timeframe',
 'pred_timeframe',
 'score_period_timeframe',
 'gt_period_reason',
 'pred_period_reasoning',
 #'score_period_reasoning',
 'gt_location',
 'pred_location',
 'score_location_string',
 'gt_location_QID',
 'pred_location_qid',
 'score_location_qid',
 'gt_location_reason',
 'pred_location_reasoning',
 #'score_location_reasoning'
 ]

In [131]:
df_eval_data[display_columns].to_csv('../data/evaluation_data.csv', sep='\t')