### 1. Import

In [None]:
import os
import json

from langchain_openai import ChatOpenAI
from ragas.llms import LangchainLLMWrapper

from document_processor import TextProcessor, ImageProcessor, PageImageProcessor, ImageTextualSummaryProcessor
from multimodal_rag import MultimodalRAG
from embedder import OpenAIEmbedder, ColPaliEmbedder
from pdf_to_qa import generate_qa_for_pdf
from evaluation import evaluate_generation, compute_mrr_at_k, compute_recall_at_k, compute_precision_at_k, compute_f1_score
from datasets import load_dataset
from chartQAloader import generate_chartQA_pdf_and_json

### 2. Configuration

In [None]:
# PDF_FILE = "knowledge/subset_riksbanken.pdf"
# PDF_FILE = "knowledge/ChartQA_Evaluation_Set.pdf"
PDF_FILE = "knowledge/subset_ChartQA_Evaluation_Set.pdf"

# text_processor = TextProcessor(OpenAIEmbedder())
# image_processor = ImageProcessor(ColPaliEmbedder())
#page_image_processor = PageImageProcessor(ColPaliEmbedder())
image_textual_summary_processor = ImageTextualSummaryProcessor(OpenAIEmbedder(), no = 3) #Select no 3 for chartQA emb. 1 (or 2) for other emb.

rag = MultimodalRAG([image_textual_summary_processor], PDF_FILE)
evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini")) # For Ragas evaluation

### 3. Generate Dataset

#### ChartQA

In [None]:
# Load ChartQA from huggingface
dataset = load_dataset('lmms-lab/ChartQA', split='test')
subset_dataset = dataset.select(range(10))

# Generate PDF of ChartQA and JSON dataset
# generate_chartQA_pdf_and_json(dataset, pdf_output_path='knowledge/ChartQA_Evaluation_Set.pdf', json_output_path='json_files/QA_ChartQA_Evaluation_Set.json')
generate_chartQA_pdf_and_json(subset_dataset, pdf_output_path='knowledge/subset_ChartQA_Evaluation_Set.pdf', json_output_path='json_files/QA_subset_ChartQA_Evaluation_Set.json')

#### CoinQA

In [None]:
# Check if QA file already exists
qa_filepath = "json_files/QA_" + os.path.basename(PDF_FILE).replace('.pdf', '.json')

if os.path.exists(qa_filepath):
    qa_path = qa_filepath
    print(f"Using existing QA file: {qa_path}")
else:
    qa_path = generate_qa_for_pdf(PDF_FILE)
    print(f"Generated new QA file: {qa_path}")

with open(qa_path, 'r', encoding='utf-8') as f:
    qa_data = json.load(f)

### 4. Answering the QA

In [None]:
with open(qa_path, 'r', encoding='utf-8') as f:
    qa_data = json.load(f)

# Generate dataset
rag_generated_answers = []

# Check if generated answers file already exists
rag_answers_path = "json_files/rag_generated_answers_" + os.path.basename(qa_path)

if os.path.exists(rag_answers_path):
    rag_generated_answers = json.load(open(rag_answers_path, 'r', encoding='utf-8'))
    print(f"Using existing RAG generated answers file: {rag_answers_path}")
else:
    for qa in qa_data:
        query = qa["question"]
        reference = qa["answer"]

        relevant_docs = rag.get_most_relevant_docs(query, top_k=5)
        response = rag.generate_answer(query, relevant_docs)
        rag_generated_answers.append(
            {
                "query":query,
                "retrieved_contexts":relevant_docs,
                "generated_answer":response,
                "true_answer":reference
            }
        )

    # Save the dataset to a JSON file
    output_dataset_file = "json_files/rag_generated_answers_" + os.path.basename(qa_path)

    with open(output_dataset_file, 'w', encoding='utf-8') as f:
        json.dump(rag_generated_answers, f, ensure_ascii=False, indent=4)
    print(f"Generated new RAG generated answers file: {output_dataset_file}")

### 5. Evaluate Retrieval
Select if you want to evaluate a specific question, or if you want to evaluate the entire retireval process

In [None]:
all_real_pages, all_retrieved_pages = [], []

for rag_answer in rag_generated_answers:
    real_page = next(
        item["page_number"] for item in qa_data if item["question"] == rag_answer["query"]
    )
    retrieved_pages = [doc["page_number"] for doc in rag_answer["retrieved_contexts"]]
    all_real_pages.append([real_page] if isinstance(real_page, int) else real_page)
    all_retrieved_pages.append(retrieved_pages)

# Function to test a specific question by index
def test_question(index):
    if index < 1 or index > len(rag_generated_answers):
        print("Invalid index. Please select a number between 1 and 5.")
        return

    rag_answer = rag_generated_answers[index - 1]
    real_page = all_real_pages[index - 1]
    retrieved_pages = all_retrieved_pages[index - 1]

    print(f"Question: {rag_answer['query']}")
    print(f"True Answer: {rag_answer['true_answer']}")
    print(f"Generated Answer: {rag_answer['generated_answer']}")
    print(f"Real Page(s): {real_page}")
    print(f"Retrieved Pages: {retrieved_pages}")
    return real_page, retrieved_pages

# Example usage: test the first question
real_page, retrieved_pages = test_question(5)

# Or test everything
# real_page, retrieved_pages = all_real_pages, all_retrieved_pages

for k in [3, 5]:
    mrr = compute_mrr_at_k(retrieved_pages, real_page, k)
    print(f"MRR@{k}: {mrr:.2f}")

for k in [3, 5]:
    recall = compute_recall_at_k(retrieved_pages, real_page, k)
    print(f"Recall@{k}: {recall:.2f}")

for k in [3, 5]:
    precision = compute_precision_at_k(retrieved_pages, real_page, k)
    print(f"Precision@{k}: {precision:.2f}")
    
for k in [3, 5]:
    f1_score = compute_f1_score(retrieved_pages, real_page, k)
    print(f"F1 Score@{k}: {f1_score:.2f}")

### 6. Evaluate Generation

In [None]:
faithfulness_and_relevance = evaluate_generation(rag_generated_answers, evaluator_llm)
print(faithfulness_and_relevance)