In [None]:
%load_ext autoreload

In [None]:
from pathlib import Path

from config import config
from data_processing import (
    load_and_process_csv,
    load_documents_from_csv,
    split_documents,
)
from embeddings import initialize_embeddings
from evaluation import evaluate_rag_pipeline, generate_test_dataset
from rag_chain import RAGOutput
from retrieval import build_retriever
from utils import print_separator

In [None]:
# from pubmed_scraper import PubMedScraper

# scraper = PubMedScraper(email = "olandechris@gmail.com")

# data = scraper.search_with_llm(query = "Find me 50 papers about Covid 19 from 2019 to 2025")

In [None]:
def prepare_data(input_csv: str, output_csv: str | Path | None = None) -> Path:
    """
    Prepare and process the input data.

    Args:
        input_csv: Path to input CSV file
        output_csv: Path to save processed CSV (optional)

    Returns:
        Path to the processed CSV file
    """
    print_separator("DATA PREPARATION")
    if output_csv is None:
        output_csv = Path(config.paths.data_dir) / "tests.csv"

    # Load and process CSV
    print(f"Loading data from {input_csv}")
    df = load_and_process_csv(input_csv, output_csv)

    return Path(output_csv)

In [None]:
def build_rag_system(csv_path: str):
    """
    Build the complete RAG system.

    Args:
        csv_path: Path to processed CSV file

    Returns:
        Tuple of (rag_chain, splitted_documents, embeddings)
    """
    print_separator("BUILDING RAG SYSTEM")
    print("Initializing Embeddings ...")

    embeddings = initialize_embeddings(
        model_name=config.model.embedding_model,
        cache_dir=config.model.embedding_cache_dir,
    )

    # Load and split the documents
    print("Loading documents...")
    documents = load_documents_from_csv(csv_path)

    print("Splitting documents...")
    splitted_documents = split_documents(documents, embeddings)
    print(f"Created {len(splitted_documents)} document chunks")

    # Build Retriever
    print("Building retriever")
    retriever = build_retriever(splitted_documents, embeddings, config.retriever)

    # Initialize RAG Chain
    print("Initializing RAG Chain ...")

    # Initialize RAG chain
    print("Initializing RAG chain...")
    rag_chain = RAGOutput(
        prompt_name="rlm/rag-prompt",
        retriever=retriever,
        llm_model=config.model.deepseek_model,
    )
    rag_chain.create_chain()

    print("RAG system built successfully")
    return rag_chain, splitted_documents, embeddings

In [None]:
def run_evaluation(
    rag_chain: RAGOutput,
    test_dataset_path: str | Path | None = None,
    results_path: str | Path | None = None,
):
    """
    Run evaluation on the RAG system.

    Args:
        rag_chain: Configured RAG chain
        test_dataset_path: Path to test dataset (optional)
        results_path: Path to save results (optional)
    """
    print_separator("EVALUATION")

    if test_dataset_path is None:
        test_dataset_path = config.paths.rag_eval_dir / "generated_testset.csv"

    if results_path is None:
        results_path = config.paths.rag_eval_dir / "results_deepseek_fastembed.csv"

    # Run evaluation
    results = evaluate_rag_pipeline(
        rag_chain=rag_chain,
        input_csv_path=str(test_dataset_path),
        output_csv_path=str(results_path),
        question_column="user_input",
    )

    print(f"Evaluation complete. Results shape: {results.shape}")
    return results

In [None]:
def generate_synthetic_testset(
    splitted_documents, embeddings, llm, testset_size: int = 10
):
    """
    Generate synthetic test dataset using RAGAS.

    Args:
        splitted_documents: Split documents
        embeddings: Embeddings model
        llm: Language model
        testset_size: Number of test samples

    Returns:
        Generated test dataset as DataFrame
    """
    print_separator("GENERATING TEST DATASET")

    output_path = config.paths.rag_eval_dir / "generated_testset.csv"

    dataset = generate_test_dataset(
        documents=splitted_documents,
        embeddings=embeddings,
        llm=llm,
        testset_size=testset_size,
        output_path=str(output_path),
    )

    print(f"Test dataset generated with {len(dataset)} samples")
    return dataset

In [None]:
# df = scraper.search_with_llm(query = "Find papers about the impact of Gaza war on children")

## Evaluate the RAG - Add the section to the evaluation.py

In [None]:
# evaluation_llm = LangchainLLMWrapper(llm)
# evaluation_embeddings = LangchainEmbeddingsWrapper(embeddings)

In [None]:
# @dataclass
# class RAGEvaluator:
#     """RAG Evaluation pipeline for model-embedding pairs."""

#     max_workers: int = 1
#     timeout: int = 180
#     generative_models: list[str] = field(default_factory=lambda: ["deepseek-chat"])
#     embedding_models: list[str] = field(default_factory=lambda: ["fastembed"])
#     metrics: list = field(
#         default_factory=lambda: [
#             ContextRecall(),
#             ContextPrecision(),
#             AnswerSimilarity(),
#             ContextEntityRecall(),
#             NoiseSensitivity(),
#             Faithfulness(),
#         ]
#     )

#     def __post_init__(self):
#         """Initialize RunConfig after dataclass initialization."""
#         self.run_config = RunConfig(max_workers=self.max_workers, timeout=self.timeout)

#     def parse_contexts(self, data: pd.DataFrame) -> pd.DataFrame:
#         """Parse retrieved_contexts from string to list."""
#         if "retrieved_contexts" in data.columns:
#             data["retrieved_contexts"] = data["retrieved_contexts"].apply(
#                 ast.literal_eval
#             )
#         return data

#     def prepare_dataset(self, data: pd.DataFrame) -> EvaluationDataset:
#         """Prepare evaluation dataset from dataframe."""
#         eval_data = data[
#             ["user_input", "reference", "response", "retrieved_contexts"]
#         ].to_dict(orient="records")
#         return EvaluationDataset.from_list(eval_data)

#     def run_evaluation(
#         self, input_csv_path: str, evaluation_embeddings
#     ) -> pd.DataFrame:
#         """
#         Run evaluation on input data.

#         Args:
#             input_csv_path: Path to input CSV file
#             evaluation_embeddings: Embeddings to use for evaluation

#         Returns:
#             DataFrame with evaluation results
#         """
#         data = pd.read_csv(input_csv_path)
#         data = self.parse_contexts(data)
#         eval_dataset = self.prepare_dataset(data)

#         evaluator_llm = LangchainLLMWrapper(ChatDeepSeek(model="deepseek-chat"))

#         results = evaluate(
#             dataset=eval_dataset,
#             metrics=self.metrics,
#             llm=evaluator_llm,
#             embeddings=evaluation_embeddings,
#             run_config=self.run_config,
#         )

#         return results.to_pandas()

#     def evaluate_all_models(self, evaluation_embeddings):
#         """
#         Evaluate all model-embedding pairs.

#         Args:
#             evaluation_embeddings: Embeddings to use for evaluation
#         """
#         for model, embedding in zip(self.generative_models, self.embedding_models):
#             model_pair = f"{model}_{embedding}"
#             output_csv_path = f"RAGEvaluation/evaluation_results_{model_pair}.csv"

#             if os.path.exists(output_csv_path):
#                 print(f"Loading existing results for {model_pair}")
#                 df = pd.read_csv(output_csv_path)
#             else:
#                 print(f"Running evaluation for {model_pair}")
#                 input_csv_path = "RAGEvaluation/results_deepseek_fastembed.csv"

#                 df = self.run_evaluation(input_csv_path, evaluation_embeddings)
#                 df.to_csv(output_csv_path, index=False)

In [None]:
# evaluator = RAGEvaluator(max_workers=1, timeout=180)
# evaluator.evaluate_all_models(evaluation_embeddings)

In [None]:
# df = pd.read_csv("RAGEvaluation/results_deepseek_fastembed.csv")

["<1-hop>\n\nArticle: Family systems approach to attachment relations, war trauma, and mental health among Palestinian children and parents.<b>Background</b>: Trauma affects the family unit as a whole; however, most existing research uses individual or, at most, dyadic approaches to analyse families with histories of trauma. <b>Objective</b>: This study aims to identify potentially distinct family types according to attachment, parenting, and sibling relations, to analyse how these family types differ with respect to war trauma, and to explore how children's mental health and cognitive processing differ across these family types. <b>Method:</b> Participants included Palestinian mothers and fathers (<i>N</i>\xa0=\xa0325) and their children (one per family; 49.4% girls; 10-13\xa0years old; mean\xa0±\xa0<i>SD</i> age\xa0=\xa011.35 ± 0.57 years) after the Gaza War of 2008-2009. Both parents reported their exposure to war trauma, secure attachment availability, and parenting practices, as w