### 1. Import

In [1]:
import os
import json

from langchain_openai import ChatOpenAI
from ragas.llms import LangchainLLMWrapper
from datasets import load_dataset

from document_processor import TextProcessor, ImageProcessor, PageImageProcessor, ImageTextualSummaryProcessor
from multimodal_rag import MultimodalRAG
from embedder import OpenAIEmbedder, ColPaliEmbedder
from pdf_to_qa import generate_qa_for_pdf, generate_chartQA_pdf_and_json
from evaluation import evaluate_generation, compute_mrr_at_k, compute_recall_at_k, compute_precision_at_k, compute_f1_score, compute_map_at_k

### 2. Configuration

In [None]:
dataset = "CoinQA"
k = 10

if dataset.upper() == "COINQA":
    PDF_FILE = "knowledge/subset_riksbanken.pdf"
elif dataset.upper() == "CHARTQA":
    PDF_FILE = "knowledge/subset_ChartQA.pdf"
else:
    raise ValueError("Dataset not supported")

text_processor = TextProcessor(OpenAIEmbedder())
image_processor = ImageProcessor(ColPaliEmbedder(), dataset)
#page_image_processor = PageImageProcessor(ColPaliEmbedder())
#image_textual_summary_processor = ImageTextualSummaryProcessor(OpenAIEmbedder(), dataset)

rag = MultimodalRAG([text_processor,image_processor], PDF_FILE)
evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini")) # For Ragas evaluation



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### 3. Generate Dataset

In [3]:
# Check if QA file already exists
qa_filepath = "json_files/QA_" + os.path.basename(PDF_FILE).replace('.pdf', '.json')

if os.path.exists(qa_filepath):
    print(f"Using existing QA file: {qa_filepath}")

elif dataset.upper() == "COINQA":
    generate_qa_for_pdf(PDF_FILE, json_output_path=qa_filepath)
    print(f"Generated new CoinQA file: {qa_filepath}")

elif dataset.upper() == "CHARTQA":
    chartqa = load_dataset('lmms-lab/ChartQA', split='test')
    subset_chartqa = chartqa.select(range(20))
    
    generate_chartQA_pdf_and_json(subset_chartqa, pdf_output_path=PDF_FILE, json_output_path=qa_filepath)
    print(f"Generated new ChartQA file: {qa_filepath}")

Using existing QA file: json_files/QA_subset20_riksbanken.json


### 4. Answering the QA

In [4]:
with open(qa_filepath, 'r', encoding='utf-8') as f:
    qa_data = json.load(f)

# Generate dataset
rag_generated_answers = []

# Check if generated answers file already exists
rag_answers_path = "json_files/rag_generated_answers_" + os.path.basename(qa_filepath).replace('.json', f'_{rag.name}.json')

if os.path.exists(rag_answers_path):
    rag_generated_answers = json.load(open(rag_answers_path, 'r', encoding='utf-8'))
    print(f"Using existing RAG generated answers file: {rag_answers_path}")
else:
    for qa in qa_data:
        query = qa["question"]
        reference = qa["answer"]

        relevant_docs = rag.get_most_relevant_docs(query, top_k=k)
        response = rag.generate_answer(query, relevant_docs)
        rag_generated_answers.append(
            {
                "query":query,
                "retrieved_contexts":relevant_docs,
                "generated_answer":response,
                "true_answer":reference
            }
        )

    # Save the dataset to a JSON file
    output_dataset_file = "json_files/rag_generated_answers_" + os.path.basename(qa_filepath).replace('.json', f'_{rag.name}.json')
    with open(output_dataset_file, 'w', encoding='utf-8') as f:
        json.dump(rag_generated_answers, f, ensure_ascii=False, indent=4)
    print(f"Generated new RAG generated answers file: {output_dataset_file}")

TypeError: expected Tensor as element 0 in argument 0, but got list

### 5. Evaluate Retrieval

In [None]:
all_real_pages, all_retrieved_pages = [], []

for rag_answer in rag_generated_answers:
    real_page = next(
        item["page_number"] for item in qa_data if item["question"] == rag_answer["query"]
    )
    retrieved_pages = [doc["page_number"] for doc in rag_answer["retrieved_contexts"]]
    all_real_pages.append([real_page] if isinstance(real_page, int) else real_page)
    all_retrieved_pages.append(retrieved_pages)

# Function to test a specific question by index
def test_question(index):
    if index < 1 or index > len(rag_generated_answers):
        print("Invalid index. Please select a number between 1 and 5.")
        return

    rag_answer = rag_generated_answers[index - 1]
    real_page = all_real_pages[index - 1]
    retrieved_pages = all_retrieved_pages[index - 1]

    print(f"Question: {rag_answer['query']}")
    print(f"True Answer: {rag_answer['true_answer']}")
    print(f"Generated Answer: {rag_answer['generated_answer']}")
    print(f"Real Page(s): {real_page}")
    print(f"Retrieved Pages: {retrieved_pages}")
    return real_page, retrieved_pages

# Example usage: test the first question
#real_page, retrieved_pages = test_question(5)

# Or test everything
real_page, retrieved_pages = all_real_pages, all_retrieved_pages

mrr = compute_mrr_at_k(retrieved_pages, real_page, k)
print(f"MRR@{k}: {mrr:.2f}")
recall = compute_recall_at_k(retrieved_pages, real_page, k)
print(f"Recall@{k}: {recall:.2f}")
precision = compute_precision_at_k(retrieved_pages, real_page, k)
print(f"Precision@{k}: {precision:.2f}")
f1_score = compute_f1_score(retrieved_pages, real_page, k)
print(f"F1 Score@{k}: {f1_score:.2f}")
map = compute_map_at_k(retrieved_pages, real_page, k)
print(f"mAP Score: {map:.2f}")

### 6. Evaluate Generation

In [None]:
# Define the file path for storing the results
results_folder = "results"
os.makedirs(results_folder, exist_ok=True)
results_file_path = os.path.join(results_folder, os.path.basename(qa_filepath).replace('.json', f'_{rag.name}.json'))

# Check if the results file already exists
if os.path.exists(results_file_path):
    print(f"Results already exist: {results_file_path}")
    print("Loading existing results...")
    with open(results_file_path, 'r', encoding='utf-8') as file:
        print(file.read())
    try:
        with open(results_file_path, 'r', encoding='utf-8') as f:
            faithfulness_and_relevance = json.load(f)
    except json.JSONDecodeError:
        print(f"Error: The file {results_file_path} contains invalid JSON or is empty.")
        faithfulness_and_relevance = None
else:
    # Evaluate generation
    faithfulness_and_relevance = str(evaluate_generation(rag_generated_answers, evaluator_llm))
    
    # Replace single quotes with double quotes
    json_string = faithfulness_and_relevance.replace("'", '"')

    with open(results_file_path, 'w', encoding='utf-8') as f:
        f.write(json_string)  # Write string instead of using json.dump()
    print(f"Results saved to: {results_file_path}")