# Chain of Verification Tests
This notebook loads `Streamlit_app/llm_extractor_pipeline .py` and drives the `llm_extractor` helper with the `chain_of_verification` flag enabled.
It focuses on validating the multi-step reasoning logic without needing to talk to a live Ollama server.

## Notebook structure
1. Import the pipeline module directly from its path.
2. Provide lightweight stand-ins for the data loader and conversational history wrapper.
3. Feed canned responses through `_run_chain_of_verification` and inspect the resulting dataframe payload.

In [None]:
from pathlib import Path
import importlib.util
import sys

PROJECT_ROOT = Path.cwd()
PIPELINE_PATH = PROJECT_ROOT / "Streamlit_app" / "llm_extractor_pipeline .py"

spec = importlib.util.spec_from_file_location("llm_pipeline", PIPELINE_PATH)
llm_pipeline = importlib.util.module_from_spec(spec)
sys.modules["llm_pipeline"] = llm_pipeline
spec.loader.exec_module(llm_pipeline)

llm_pipeline.llm_extractor


In [None]:
import pandas as pd
from contextlib import contextmanager
from unittest.mock import patch
from langchain_core.messages import SystemMessage, HumanMessage

class SimpleFilteredData:
    """Mimic the Spark dataframe expected by llm_extractor."""

    def __init__(self, text: str):
        self._text = text

    def collect(self):
        return self

    def to_pandas(self):
        return pd.DataFrame([{"text": self._text}])

@contextmanager
def mock_chain_of_verification(responses):
    """Replace the conversational runnable and model loader with canned responses."""

    class DummyConversation:
        def __init__(self, *_args, **_kwargs):
            self._responses = list(responses)
            self._cursor = 0

        def invoke(self, payload, config=None):
            if self._cursor >= len(self._responses):
                raise RuntimeError("No more canned responses left for the mock conversation.")
            value = self._responses[self._cursor]
            self._cursor += 1
            return value

    with patch.object(llm_pipeline, "RunnableWithMessageHistory", DummyConversation),          patch.object(llm_pipeline, "load_model_ollama", lambda *args, **kwargs: object()):
        yield


In [None]:
clinical_note = """Patient de 58 ans consultant pour douleur thoracique constrictive depuis 2 heures.
Pas de fièvre mais légère dyspnée à l'effort et antécédent de tabagisme arrêté depuis 5 ans."""

filtered_data = SimpleFilteredData(clinical_note)
messages = [
    SystemMessage(content="Tu es un assistant clinique qui extrait des concepts médicaux structuré en JSON."),
    HumanMessage(content="Identifie quatre concepts clé du texte fourni, renvoie uniquement la liste JSON conforme au schéma concept/context/presence. Le compte rendu: {cr_medical}"),
]


In [None]:
initial_response = """Voici la structure demandée :
[
  {"concept": "douleur thoracique", "context": "Episode aigu décrit dans l'introduction", "presence": true},
  {"concept": "dyspnée", "context": "Essoufflement à l'effort mentionné paragraphe 2", "presence": true},
  {"concept": "fièvre", "context": "Absence de fièvre signalée explicitement", "presence": false},
  {"concept": "tabagisme", "context": "Arrêt depuis 5 ans", "presence": false}
]
"""

analysis_response = """Je vérifie concept par concept :
- Douleur thoracique confirmée par la première phrase.
- Dyspnée décrite comme légère.
- Fièvre explicitement absente => présence = false.
- Tabagisme arrêté => concept présent mais absence actuelle.
Toutes les entrées respectent le schéma demandé."""

final_response = """Reproduction stricte :
[
  {"concept": "douleur thoracique", "context": "Douleur constrictive aiguë", "presence": true},
  {"concept": "dyspnée", "context": "Essoufflement d'effort léger", "presence": true},
  {"concept": "fièvre", "context": "Aucune élévation thermique constatée", "presence": false},
  {"concept": "tabagisme", "context": "Arrêt du tabac il y a 5 ans", "presence": false}
]
"""

with mock_chain_of_verification([initial_response, analysis_response, final_response]):
    result_df = llm_pipeline.llm_extractor(
        messages=messages,
        filtered_data=filtered_data,
        model_name="llama3.1",
        num_ctx=4096,
        max_output=512,
        temperature=0.2,
        top_p=0.9,
        top_k=40,
        max_retries=1,
        chain_of_verification=True,
    )

result_df


In [None]:
concepts = pd.DataFrame(result_df.loc[0, "concept_extracted"])
logs = result_df.loc[0, "brut_response"]
print("Concepts structurés :")
display(concepts)
print("
Journal de la chaîne de vérification :")
print(logs)
