In [1]:
import os
import pandas as pd
import requests
import tempfile
from dotenv import load_dotenv
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import DocArrayInMemorySearch
import giskard
from giskard.rag import KnowledgeBase, generate_testset
import nest_asyncio
nest_asyncio.apply()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load environment variables
load_dotenv()

True

In [3]:
# Set up Ollama + embedding
api_base = "http://localhost:11434"
giskard.llm.set_llm_model("ollama/mistral", api_base="http://localhost:11434", disable_structured_output=True)
giskard.llm.set_embedding_model("ollama/nomic-embed-text", api_base=api_base)

In [4]:
# Load the Excel file with 'id' and 'link' columns
df_links = pd.read_excel("dataset.xlsx")
df_links.columns = df_links.columns.str.strip().str.lower()


In [5]:
# Use HuggingFace embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")

  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")


In [10]:
import traceback

for idx, row in df_links.iterrows():
    drug_id = row["id"]
    pdf_url = row["link"]

    print(f"\n📄 Processing {drug_id}...")

    try:
        # Download PDF
        response = requests.get(pdf_url)
        response.raise_for_status()

        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
            tmp_file.write(response.content)
            temp_pdf_path = tmp_file.name

        # Load and split PDF into chunks
        loader = UnstructuredPDFLoader(temp_pdf_path)
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
        documents = loader.load_and_split(text_splitter)

        if len(documents) < 2:
            print(f"[{drug_id}] ⚠️ Warning: only {len(documents)} chunk(s) extracted — may cause clustering issues.")

        # Embed documents
        vectorstore = DocArrayInMemorySearch.from_documents(documents, embedding=embedding_model)

        # Create knowledge base
        df_docs = pd.DataFrame([d.page_content for d in documents], columns=["text"])
        knowledge_base = KnowledgeBase(df_docs)

        # Generate 3 test questions
        testset = generate_testset(
            knowledge_base,
            num_questions=3,
            agent_description="A chatbot answering questions about medicine drugs"
        )

        # Save testset
        os.makedirs("testsets", exist_ok=True)
        out_path = f"testsets/testset_{drug_id}.jsonl"
        testset.save(out_path)
        print(f"[✓] Saved testset for {drug_id} to {out_path}")

    except Exception as e:
        print(f"[✗] Failed for {drug_id}: {e}")
        traceback.print_exc()



📄 Processing 1...




2025-05-16 14:06:16,478 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.


  warn(


2025-05-16 14:07:45,028 pid:22696 MainThread giskard.rag  INFO     Found 4 topics in the knowledge base.


Generating questions:  67%|██████▋   | 2/3 [01:20<00:43, 43.70s/it]

2025-05-16 14:11:43,503 pid:22696 MainThread giskard.rag  ERROR    Encountered error in question generation: 'question'. Skipping.
2025-05-16 14:11:43,504 pid:22696 MainThread giskard.rag  ERROR    'question'
Traceback (most recent call last):
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\question_generators\base.py", line 59, in generate_questions
    yield self.generate_single_question(knowledge_base, *args, **kwargs, seed_document=doc)
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\question_generators\simple_questions.py", line 108, in generate_single_question
    question=generated_qa["question"],
KeyError: 'question'


Generating questions:  67%|██████▋   | 2/3 [03:58<01:59, 119.24s/it]

[✓] Saved testset for 1 to testsets/testset_1.jsonl

📄 Processing 2...





2025-05-16 14:11:51,770 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 14:13:33,434 pid:22696 MainThread giskard.rag  INFO     Found 5 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [02:16<00:00, 45.39s/it]


[✓] Saved testset for 2 to testsets/testset_2.jsonl

📄 Processing 3...
2025-05-16 14:15:56,533 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 14:17:00,197 pid:22696 MainThread giskard.rag  INFO     Found 3 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [01:38<00:00, 32.70s/it]


[✓] Saved testset for 3 to testsets/testset_3.jsonl

📄 Processing 4...
2025-05-16 14:18:47,133 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 14:20:17,013 pid:22696 MainThread giskard.rag  INFO     Found 4 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [02:01<00:00, 40.52s/it]


[✓] Saved testset for 4 to testsets/testset_4.jsonl

📄 Processing 5...
2025-05-16 14:22:20,886 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.


  warn(
Disconnection_distance = inf has removed 0 edges.
It has fully disconnected 2 vertices.
You might consider using find_disconnected_points() to find and remove these points from your data.
Use umap.utils.disconnected_vertices() to identify them.
  warn(
Traceback (most recent call last):
  File "C:\Users\dunca\AppData\Local\Temp\ipykernel_22696\902400888.py", line 34, in <module>
    testset = generate_testset(
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\testset_generation.py", line 63, in generate_testset
    _ = knowledge_base.topics
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\knowledge_base.py", line 240, in topics
    self._topics_inst = self._find_topics()
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\knowledge_base.py", line 251, in _find_topics
    clustering = hdbscan.fit(self._reduced_embeddings)
  File "c:\Users\dunca\OneDrive\Deskt

[✗] Failed for 5: zero-size array to reduction operation maximum which has no identity

📄 Processing 6...
2025-05-16 14:22:35,080 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 14:25:44,726 pid:22696 MainThread giskard.rag  INFO     Found 9 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [02:02<00:00, 40.91s/it]


[✓] Saved testset for 6 to testsets/testset_6.jsonl

📄 Processing 7...
2025-05-16 14:27:59,510 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 14:31:45,091 pid:22696 MainThread giskard.rag  INFO     Found 10 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [01:57<00:00, 39.23s/it]


[✓] Saved testset for 7 to testsets/testset_7.jsonl

📄 Processing 8...
2025-05-16 14:33:44,732 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.


  warn(
Disconnection_distance = inf has removed 0 edges.
It has fully disconnected 2 vertices.
You might consider using find_disconnected_points() to find and remove these points from your data.
Use umap.utils.disconnected_vertices() to identify them.
  warn(
Traceback (most recent call last):
  File "C:\Users\dunca\AppData\Local\Temp\ipykernel_22696\902400888.py", line 34, in <module>
    testset = generate_testset(
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\testset_generation.py", line 63, in generate_testset
    _ = knowledge_base.topics
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\knowledge_base.py", line 240, in topics
    self._topics_inst = self._find_topics()
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\knowledge_base.py", line 251, in _find_topics
    clustering = hdbscan.fit(self._reduced_embeddings)
  File "c:\Users\dunca\OneDrive\Deskt

[✗] Failed for 8: zero-size array to reduction operation maximum which has no identity

📄 Processing 9...
2025-05-16 14:33:57,120 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 14:36:44,758 pid:22696 MainThread giskard.rag  INFO     Found 8 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [01:55<00:00, 38.63s/it]


[✓] Saved testset for 9 to testsets/testset_9.jsonl

📄 Processing 10...
2025-05-16 14:38:43,727 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.
2025-05-16 14:38:46,746 pid:22696 MainThread giskard.rag  INFO     Found 1 topics in the knowledge base.


  warn(
Generating questions: 100%|██████████| 3/3 [02:27<00:00, 49.30s/it]


[✓] Saved testset for 10 to testsets/testset_10.jsonl

📄 Processing 11...
2025-05-16 14:41:20,774 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 14:42:24,778 pid:22696 MainThread giskard.rag  INFO     Found 3 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [01:47<00:00, 35.73s/it]


[✓] Saved testset for 11 to testsets/testset_11.jsonl

📄 Processing 12...
2025-05-16 14:44:19,678 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 14:45:56,411 pid:22696 MainThread giskard.rag  INFO     Found 5 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [01:30<00:00, 30.26s/it]


[✓] Saved testset for 12 to testsets/testset_12.jsonl

📄 Processing 13...
2025-05-16 14:47:34,933 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 14:49:05,363 pid:22696 MainThread giskard.rag  INFO     Found 4 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [02:10<00:00, 43.35s/it]


[✓] Saved testset for 13 to testsets/testset_13.jsonl

📄 Processing 14...
2025-05-16 14:51:17,493 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.


  warn(
Disconnection_distance = inf has removed 0 edges.
It has fully disconnected 2 vertices.
You might consider using find_disconnected_points() to find and remove these points from your data.
Use umap.utils.disconnected_vertices() to identify them.
  warn(
Traceback (most recent call last):
  File "C:\Users\dunca\AppData\Local\Temp\ipykernel_22696\902400888.py", line 34, in <module>
    testset = generate_testset(
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\testset_generation.py", line 63, in generate_testset
    _ = knowledge_base.topics
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\knowledge_base.py", line 240, in topics
    self._topics_inst = self._find_topics()
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\knowledge_base.py", line 251, in _find_topics
    clustering = hdbscan.fit(self._reduced_embeddings)
  File "c:\Users\dunca\OneDrive\Deskt

[✗] Failed for 14: zero-size array to reduction operation maximum which has no identity

📄 Processing 15...
2025-05-16 14:51:32,634 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 14:54:38,640 pid:22696 MainThread giskard.rag  INFO     Found 9 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [02:16<00:00, 45.44s/it]


[✓] Saved testset for 15 to testsets/testset_15.jsonl

📄 Processing 16...
2025-05-16 14:57:06,785 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 15:00:47,798 pid:22696 MainThread giskard.rag  INFO     Found 10 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [01:31<00:00, 30.62s/it]


[✓] Saved testset for 16 to testsets/testset_16.jsonl

📄 Processing 17...
2025-05-16 15:02:21,674 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.


  warn(
Disconnection_distance = inf has removed 0 edges.
It has fully disconnected 2 vertices.
You might consider using find_disconnected_points() to find and remove these points from your data.
Use umap.utils.disconnected_vertices() to identify them.
  warn(
Traceback (most recent call last):
  File "C:\Users\dunca\AppData\Local\Temp\ipykernel_22696\902400888.py", line 34, in <module>
    testset = generate_testset(
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\testset_generation.py", line 63, in generate_testset
    _ = knowledge_base.topics
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\knowledge_base.py", line 240, in topics
    self._topics_inst = self._find_topics()
  File "c:\Users\dunca\OneDrive\Desktop\Disertation\LLM\venv\lib\site-packages\giskard\rag\knowledge_base.py", line 251, in _find_topics
    clustering = hdbscan.fit(self._reduced_embeddings)
  File "c:\Users\dunca\OneDrive\Deskt

[✗] Failed for 17: zero-size array to reduction operation maximum which has no identity

📄 Processing 18...
2025-05-16 15:02:33,594 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 15:05:22,350 pid:22696 MainThread giskard.rag  INFO     Found 8 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [02:11<00:00, 43.82s/it]


[✓] Saved testset for 18 to testsets/testset_18.jsonl

📄 Processing 19...
2025-05-16 15:07:36,587 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.
2025-05-16 15:07:39,640 pid:22696 MainThread giskard.rag  INFO     Found 1 topics in the knowledge base.


  warn(
Generating questions: 100%|██████████| 3/3 [02:12<00:00, 44.11s/it]


[✓] Saved testset for 19 to testsets/testset_19.jsonl

📄 Processing 20...
2025-05-16 15:09:58,479 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 15:11:04,829 pid:22696 MainThread giskard.rag  INFO     Found 3 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [01:47<00:00, 36.00s/it]


[✓] Saved testset for 20 to testsets/testset_20.jsonl

📄 Processing 21...
2025-05-16 15:13:00,622 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 15:14:38,693 pid:22696 MainThread giskard.rag  INFO     Found 5 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [01:48<00:00, 36.18s/it]


[✓] Saved testset for 21 to testsets/testset_21.jsonl

📄 Processing 22...
2025-05-16 15:16:34,583 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 15:18:17,026 pid:22696 MainThread giskard.rag  INFO     Found 5 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [02:19<00:00, 46.60s/it]


[✓] Saved testset for 22 to testsets/testset_22.jsonl

📄 Processing 23...
2025-05-16 15:20:41,195 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.


  warn(


2025-05-16 15:21:40,380 pid:22696 MainThread giskard.rag  INFO     Found 3 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [01:32<00:00, 30.94s/it]


[✓] Saved testset for 23 to testsets/testset_23.jsonl

📄 Processing 24...
2025-05-16 15:23:19,652 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2025-05-16 15:25:03,580 pid:22696 MainThread giskard.rag  INFO     Found 5 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [02:02<00:00, 40.89s/it]


[✓] Saved testset for 24 to testsets/testset_24.jsonl

📄 Processing 25...
2025-05-16 15:27:11,424 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.


  warn(


2025-05-16 15:28:07,504 pid:22696 MainThread giskard.rag  INFO     Found 3 topics in the knowledge base.


Generating questions: 100%|██████████| 3/3 [01:34<00:00, 31.48s/it]


[✓] Saved testset for 25 to testsets/testset_25.jsonl

📄 Processing 26...
2025-05-16 15:29:44,549 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.
2025-05-16 15:29:49,873 pid:22696 MainThread giskard.rag  INFO     Found 1 topics in the knowledge base.


  warn(
Generating questions: 100%|██████████| 3/3 [01:48<00:00, 36.13s/it]


[✓] Saved testset for 26 to testsets/testset_26.jsonl

📄 Processing 27...
2025-05-16 15:31:42,033 pid:22696 MainThread giskard.rag  INFO     Finding topics in the knowledge base.
2025-05-16 15:31:47,395 pid:22696 MainThread giskard.rag  INFO     Found 1 topics in the knowledge base.


  warn(
Generating questions: 100%|██████████| 3/3 [01:43<00:00, 34.45s/it]

[✓] Saved testset for 27 to testsets/testset_27.jsonl





In [11]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
print(prompt.format(context="Here is some context", question="Here is a question"))


Answer the question based on the context below. If you can't
answer the question, reply "I don't know".

Context: Here is some context

Question: Here is a question



In [12]:
from langchain_ollama import OllamaLLM,OllamaEmbeddings 
from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter
import os
from dotenv import load_dotenv

load_dotenv()
MODEL = "llama2"
model = OllamaLLM(model=MODEL)

chain = (
    {
        "context": itemgetter("question") | vectorstore.as_retriever(),
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | StrOutputParser()
)

In [13]:
def answer_fn(question, history=None):
    return chain.invoke({"question": question})  

In [17]:
from giskard.rag import evaluate

report = evaluate(answer_fn, testset=testset, knowledge_base=knowledge_base)

Asking questions to the agent: 100%|██████████| 3/3 [03:39<00:00, 73.19s/it]
CorrectnessMetric evaluation: 100%|██████████| 3/3 [00:41<00:00, 13.92s/it]


In [18]:
display(report)

In [19]:
report.correctness_by_question_type()

Unnamed: 0_level_0,correctness
question_type,Unnamed: 1_level_1
complex,1.0
distracting element,1.0
simple,0.0


In [20]:
report.get_failures()

Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata,agent_answer,correctness,correctness_reason
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
aaa3bd93-8110-47e9-b186-b2db1ea2434d,For which patient group is Disulfiram not reco...,"Disulfiram is contraindicated when used alone,...",Document 2: Disulfiram is absorbed slowly from...,[],"{'question_type': 'simple', 'seed_document_id'...","Based on the context provided, Disulfiram is n...",False,The agent provided an answer based on a specif...
