# Evaluation Notebook

This notebook demonstrates evaluation metrics for the CLIR system.

## Metrics:
- Precision@K
- Recall@K
- F1-Score@K
- Mean Reciprocal Rank (MRR)


In [None]:
import sys
from pathlib import Path
sys.path.append(str(Path().resolve().parent))

from src.evaluation import RetrievalEvaluator
from src.retrieval import DocumentRetriever
import pandas as pd
import matplotlib.pyplot as plt


## Initialize Evaluator


In [None]:
# Initialize evaluator
evaluator = RetrievalEvaluator()

print(f"Loaded {len(evaluator.retriever.documents)} documents")


## Example Evaluation

For demonstration, we'll evaluate a query with known relevant documents.


In [None]:
# Example: Query about Prime Minister
# Assuming document at index 1 is relevant
query = "Who is the Prime Minister of India?"
relevant_indices = [1]  # Index of relevant document

metrics = evaluator.evaluate_query(query, relevant_indices, top_k=5)

print("Evaluation Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.3f}")


## Batch Evaluation


In [None]:
# Example batch evaluation
test_queries = [
    ("Who is the Prime Minister of India?", [1]),
    ("What is the capital of India?", [2]),
    ("What is the official language of India?", [4]),
]

avg_metrics = evaluator.evaluate_batch(test_queries, top_k=5)

print("Average Metrics:")
for metric, value in avg_metrics.items():
    print(f"{metric}: {value:.3f}")

# Save to CSV
df = pd.DataFrame([avg_metrics])
df.to_csv("../results/performance_metrics.csv", index=False)
print("\nMetrics saved to results/performance_metrics.csv")
