In [13]:

import os
import pandas as pd
from pathlib import Path
from bert_score import score

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.vectorstores import InMemoryVectorStore

from qanda import QandA

In [16]:
FILE_PATH = Path("jsondata/Rodier-Finding.jsonl")
GEN_MODEL = "gemma3"  # Can swap to "llama3.2" or "phi4-mini"
EMBED_MODEL = "mxbai-embed-large"
VDB = InMemoryVectorStore
TOP_K = 3

PROMPT = ChatPromptTemplate.from_template(
    """Context information is below.
    \n---------------------\n
    {context}
    \n---------------------\n
    Given the context information and not prior knowledge, answer the query.\n
    Query: {input}\n
    Answer:\n""",
)

In [18]:
qanda = QandA(gen_model=GEN_MODEL,
              embed_model=EMBED_MODEL, 
              vdb=VDB,
              file_path=FILE_PATH,
              top_k=TOP_K,
              prompt=PROMPT)

Initializing, please wait...
Loading jsondata\Rodier-Finding.jsonl
Question Answer chain ready.


In [20]:
QUESTIONS = [
    "Who is the coroner?", 
    "Who is the deceased?", 
    "What was the cause of death?"
]

for i, question in enumerate(QUESTIONS, 1):
    answer = qanda.ask(question)
    print(f"Question {i}: {question}")
    print(f"Answer: {answer}\n")

Question 1: Who is the coroner?
Answer: Sarah Helen Linton, Deputy State Coroner.

Question 2: Who is the deceased?
Answer: Frank Edward Rodier is the deceased.

Question 3: What was the cause of death?
Answer: The cause of death remains unascertained. The report states, “his cause of death must remain unascertained.” It does, however, acknowledge the possibility that injuries sustained from the rocks contributed to his death.



In [21]:
verbose_answers = []
sources_list = []

for i, question in enumerate(QUESTIONS, 1):
    answer, sources = qanda.ask(question, verbose=True)
    verbose_answers.append(answer)
    sources_list.append(sources)
    
    print(f"Question {i}: {question}")
    print(f"Answer: {answer}\n")
    print("Sources:")
    for src in sources:
        print(f"  Source {src['source']}:")
        print(f"    Text: {src['text'][:200]}...")  # Truncate for readability
        print(f"    Page: {src['page']}")
        print(f"    Document: {src['document']}\n")
    print("-" * 50)

Question 1: Who is the coroner?
Answer: Sarah Helen Linton, Deputy State Coroner.

Sources:
  Source 1:
    Text: Counsel Appearing:
Senior Constable C Robertson assisted the Coroner .
Case(s) referred to in decision(s):
Nil...
    Page: 1
    Document: data/Rodier-Finding.pdf

  Source 2:
    Text: [2024] WACOR 35
Coroners Act 1996 (Section 26(1))...
    Page: 2
    Document: data/Rodier-Finding.pdf

  Source 3:
    Text: [2024] WACOR 35
JURISDICTION
CORONER'S COURT OF WESTERN AUSTRALIA
ACT
CORONERS ACT 1996
CORONER
SARAH HELEN LINTON, DEPUTY STATE CORONER
HEARD
14 AUGUST 2024
DELIVERED
14 AUGUST 2024
FILE NO/S
CORC 32...
    Page: 1
    Document: data/Rodier-Finding.pdf

--------------------------------------------------
Question 2: Who is the deceased?
Answer: Frank Edward Rodier

Sources:
  Source 1:
    Text: IS DEATH ESTABLISHED?
17. As is clear from the above; I am satisfied beyond reasonable doubt that Frank Rodier is deceased and that he died on 25 1975 in the sea after he was

In [22]:
def calculate_bertscore_df(df):
    """
    Computes BERTScore metrics for a DataFrame with 'CORRECT_ANSWER' and 'LLM_ANSWER' columns.
    
    Args:
        df (pd.DataFrame): Input DataFrame.
    
    Returns:
        pd.DataFrame: Augmented with BERT_PRECISION, BERT_RECALL, BERT_F1 columns.
    """
    references = df['CORRECT_ANSWER'].tolist()
    candidates = df['LLM_ANSWER'].tolist()
    
    precision, recall, f1 = score(candidates, references, lang="en", verbose=True)
    
    df['BERT_PRECISION'] = precision.tolist()
    df['BERT_RECALL'] = recall.tolist()
    df['BERT_F1'] = f1.tolist()
    
    return df

In [23]:
import pandas as pd
from bert_score import score
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

QUESTIONS = [
    "Who is the coroner?", 
    "Who is the deceased?", 
    "What was the cause of death?"
]
GEN_MODEL = "gemma3" 
verbose_answers = [
    "Sarah Helen Linton",  # Example from basic query
    "Frank Edward Rodier",
    "The cause of death was unascertained."
]  

CORRECT_ANSWERS = [
    "Sarah Helen Linton",  # Coroner
    "Frank Edward Rodier",  # Deceased
    "unascertained"         # Cause of death
]

def calculate_bertscore_df(df):
    """
    Computes BERTScore metrics for a DataFrame with 'CORRECT_ANSWER' and 'LLM_ANSWER' columns.
    
    Args:
        df (pd.DataFrame): Input DataFrame.
    
    Returns:
        pd.DataFrame: Augmented with BERT_PRECISION, BERT_RECALL, BERT_F1 columns.
    """
    references = df['CORRECT_ANSWER'].tolist()
    candidates = df['LLM_ANSWER'].tolist()
    
    precision, recall, f1 = score(candidates, references, lang="en", verbose=True)
    
    df['BERT_PRECISION'] = precision.tolist()
    df['BERT_RECALL'] = recall.tolist()
    df['BERT_F1'] = f1.tolist()
    
    return df

data = {
    'FILENAME': ['Rodier-Finding'] * len(QUESTIONS),
    'MODEL': [GEN_MODEL] * len(QUESTIONS),
    'QUESTION': QUESTIONS,
    'CORRECT_ANSWER': CORRECT_ANSWERS,
    'LLM_ANSWER': verbose_answers  # Use verbose answers for evaluation
}

df = pd.DataFrame(data)
scores_df = calculate_bertscore_df(df)

# Display the results
print("Evaluation Results:")
display(scores_df)  # Use display() for nicer Jupyter output

# Optional: Save to CSV for reporting
scores_df.to_csv('evaluation_results.csv', index=False)
print("\nResults saved to 'evaluation_results.csv'")

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 1.11 seconds, 2.71 sentences/sec
Evaluation Results:


Unnamed: 0,FILENAME,MODEL,QUESTION,CORRECT_ANSWER,LLM_ANSWER,BERT_PRECISION,BERT_RECALL,BERT_F1
0,Rodier-Finding,gemma3,Who is the coroner?,Sarah Helen Linton,Sarah Helen Linton,1.0,1.0,1.0
1,Rodier-Finding,gemma3,Who is the deceased?,Frank Edward Rodier,Frank Edward Rodier,1.0,1.0,1.0
2,Rodier-Finding,gemma3,What was the cause of death?,unascertained,The cause of death was unascertained.,0.84594,0.869418,0.857518



Results saved to 'evaluation_results.csv'


In [11]:
!where python

C:\Users\aasis\AppData\Local\Programs\Python\Python313\python.exe
C:\Users\aasis\AppData\Local\Microsoft\WindowsApps\python.exe
