In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
MODEL = "llama2"

### Scrape the website and split the content ###

In [2]:

import requests
import tempfile
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# PDF URL from DailyMed
url = "https://dailymed.nlm.nih.gov/dailymed/getFile.cfm?setid=645c8387-30d5-4a86-a5d8-2f6b5df6d5f0&type=pdf"

# Download to temp file
response = requests.get(url)
response.raise_for_status()

with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
    tmp_file.write(response.content)
    temp_pdf_path = tmp_file.name

# Load and split
loader = UnstructuredPDFLoader(temp_pdf_path)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
documents = loader.load_and_split(text_splitter)

documents


  from .autonotebook import tqdm as notebook_tqdm


[Document(metadata={'source': 'C:\\Users\\dunca\\AppData\\Local\\Temp\\tmpc7jzikac.pdf'}, page_content='NITISINONE- nitisinone capsule Analog Pharma\n\n----------\n\nHIGHLIGHTS OF PRESCRIBING INFORMATION These highlights do not include all the information needed to use NITISINONE CAPSULES safely and effectively. See full prescribing information for NITISINONE CAPSULES.\n\nNITISINONE capsules, for oral use Initial U.S. Approval: 2002\n\nINDICATIONS AND USAGE\n\nNitisinone capsules are a hydroxy-phenylpyruvate dioxygenase inhibitor indicated for the treatment of adult and pediatric patients with hereditary tyrosinemia type 1 (HT-1) in combination with dietary restriction of tyrosine and phenylalanine. (1)\n\nDOSAGE AND ADMINISTRATION\n\nRecommended Dosage (2.1):'),
 Document(metadata={'source': 'C:\\Users\\dunca\\AppData\\Local\\Temp\\tmpc7jzikac.pdf'}, page_content='Elevated Plasma Tyrosine Levels, Ocular Symptoms, Developmental Delay and Hyperkeratotic Plaques: Inadequate restriction o

In [3]:
len(documents)

46

### Load the content in a vector store ###

In [4]:
from huggingface_hub import login

login(token=os.getenv("TOKEN"))

In [5]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import DocArrayInMemorySearch

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")  

vectorstore = DocArrayInMemorySearch.from_documents(documents, embedding=embedding_model)
print(vectorstore)

  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")


<langchain_community.vectorstores.docarray.in_memory.DocArrayInMemorySearch object at 0x0000020E4A12F1C0>


### Create a knowledge base ###

In [6]:
import pandas as pd

df = pd.DataFrame([d.page_content for d in documents], columns=["text"])

df.head(10)


Unnamed: 0,text
0,NITISINONE- nitisinone capsule Analog Pharma\n...
1,The recommended starting dosage is 0.5 mg/kg o...
2,"Elevated Plasma Tyrosine Levels, Ocular Sympto..."
3,ADVERSE REACTIONS\n\nMost common adverse react...
4,Revised: 6/2023\n\nFULL PRESCRIBING INFORMATIO...
5,Nitisinone capsules are indicated for the trea...
6,Monitor plasma and/or urine succinylacetone co...
7,levels below 500 micromol/L by dietary restric...
8,2.2 Administration\n\nAdministration of Nitisi...
9,Nitisinone is an inhibitor of 4-hydroxyphenyl-...


In [7]:
import giskard

# Setarea endpointului local pentru Ollama
api_base = "http://localhost:11434"

# Setează modelul principal LLM (ex: Qwen2.5 sau llama2, în funcție de ce ai instalat în Ollama)
giskard.llm.set_llm_model("ollama/mistral", api_base="http://localhost:11434", disable_structured_output=True)

# Setează modelul de embedding (ex: nomic-embed-text, disponibil prin Ollama)
giskard.llm.set_embedding_model("ollama/nomic-embed-text", api_base=api_base)



In [8]:
import nest_asyncio
nest_asyncio.apply()

In [9]:
from giskard.rag import KnowledgeBase

knowledge_base = KnowledgeBase(df)


### Generate the Test set ###

In [10]:
from giskard.rag import generate_testset

testset = generate_testset(
    knowledge_base,
    num_questions=1,
    agent_description="A chatbot answering questions about medicine drugs",
)

2025-05-30 21:07:19,128 pid:6724 MainThread giskard.rag  INFO     Finding topics in the knowledge base.


  warn(


2025-05-30 21:08:52,916 pid:6724 MainThread giskard.rag  INFO     Found 4 topics in the knowledge base.


Generating questions: 100%|██████████| 1/1 [00:39<00:00, 39.51s/it]


In [11]:
test_set_df = testset.to_pandas()

for index, row in enumerate(test_set_df.head(3).iterrows()):
    print(f"Question {index + 1}: {row[1]['question']}")
    print(f"Reference answer: {row[1]['reference_answer']}")
    print("Reference context:")
    print(row[1]['reference_context'])
    print("------------------------------", end="\n\n")

Question 1: What are the reproductive risks associated with nitisinone in animals?
Reference answer: In animal reproduction studies, nitisinone was shown to cause incomplete skeletal ossification of fetal bones and decreased pup survival at doses 0.4 times the recommended initial dose in mice. In addition, increased gestational length was observed at doses 4 times the recommended initial dose in mice, and maternal toxicity and incomplete skeletal ossification of fetal bones were seen at doses 1.6 times the recommended initial dose in rabbits.
Reference context:
Document 19: Data

Animal Data

Reproduction studies have been performed in mice at oral doses of about 0.4, 4 and 20 times the recommended initial dose (1 mg/kg/day) and in rabbits at oral doses of about 1.6, 4 and 8 times the recommended initial dose based on the body surface area. In mice, nitisinone has been shown to cause incomplete skeletal ossification of fetal bones at 0.4, 4 and 20 times the recommended initial dose, in

In [12]:
testset.save("test-set.jsonl")

In [13]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
print(prompt.format(context="Here is some context", question="Here is a question"))



Answer the question based on the context below. If you can't
answer the question, reply "I don't know".

Context: Here is some context

Question: Here is a question



In [14]:
from langchain_ollama import OllamaLLM,OllamaEmbeddings 
from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter

model = OllamaLLM(model=MODEL)

chain = (
    {
        "context": itemgetter("question") | vectorstore.as_retriever(),
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | StrOutputParser()
)


In [15]:
def answer_fn(question, history=None):
    return chain.invoke({"question": question})    

In [16]:
from giskard.rag import evaluate

report = evaluate(answer_fn, testset=testset, knowledge_base=knowledge_base)

Asking questions to the agent: 100%|██████████| 1/1 [01:15<00:00, 75.33s/it]
CorrectnessMetric evaluation: 100%|██████████| 1/1 [00:20<00:00, 20.80s/it]


In [17]:
display(report)

In [18]:
report.to_html("report.html")

In [19]:
report.correctness_by_question_type()

Unnamed: 0_level_0,correctness
question_type,Unnamed: 1_level_1
simple,1.0


In [20]:
report.get_failures()

Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata,agent_answer,correctness
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1


In [21]:
import pandas as pd

# Assuming `report.get_failures()` returns a list of dicts or objects
failures = report.get_failures()

# If it's already a list of dictionaries, this will work:
df_failures = pd.DataFrame(failures)

# Save to Excel
df_failures.to_excel("giskard_failures.xlsx", index=False)
print("Failures saved to giskard_failures.xlsx")


Failures saved to giskard_failures.xlsx
