In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

ASI_ONE_KEY = os.getenv("ASI_ONE_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MODEL = "asi1-mini"

In [3]:
import pandas as pd
import requests
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


os.makedirs("pdfs_manual", exist_ok=True)

df = pd.read_csv("medications_dataset.csv")
links = df['link'].tolist()

pdf_files = []

for i, link in enumerate(links):
    filename = f"pdfs_manual/doc_{i}.pdf"
    try:
        r = requests.get(link)
        if r.ok:
            with open(filename, "wb") as f:
                f.write(r.content)
            pdf_files.append(filename)
            print(f"Downloaded {filename}")
        else:
            print(f"Failed to download {link}")
    except Exception as e:
        print(f"Error downloading {link}: {e}")

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

all_documents = []

for filepath in pdf_files:
    try:
        loader = PyPDFLoader(filepath)
        docs = loader.load_and_split(text_splitter)
        all_documents.extend(docs)
        print(f"Parsed {filepath} with {len(docs)} chunks.")
    except Exception as e:
        print(f"Error parsing {filepath}: {e}")


Downloaded pdfs_manual/doc_0.pdf
Downloaded pdfs_manual/doc_1.pdf
Downloaded pdfs_manual/doc_2.pdf
Downloaded pdfs_manual/doc_3.pdf
Downloaded pdfs_manual/doc_4.pdf
Downloaded pdfs_manual/doc_5.pdf
Downloaded pdfs_manual/doc_6.pdf
Downloaded pdfs_manual/doc_7.pdf
Downloaded pdfs_manual/doc_8.pdf
Downloaded pdfs_manual/doc_9.pdf
Downloaded pdfs_manual/doc_10.pdf
Downloaded pdfs_manual/doc_11.pdf
Downloaded pdfs_manual/doc_12.pdf
Downloaded pdfs_manual/doc_13.pdf
Downloaded pdfs_manual/doc_14.pdf
Downloaded pdfs_manual/doc_15.pdf
Downloaded pdfs_manual/doc_16.pdf
Downloaded pdfs_manual/doc_17.pdf
Downloaded pdfs_manual/doc_18.pdf
Downloaded pdfs_manual/doc_19.pdf
Downloaded pdfs_manual/doc_20.pdf
Downloaded pdfs_manual/doc_21.pdf
Downloaded pdfs_manual/doc_22.pdf
Downloaded pdfs_manual/doc_23.pdf
Downloaded pdfs_manual/doc_24.pdf
Downloaded pdfs_manual/doc_25.pdf
Downloaded pdfs_manual/doc_26.pdf
Downloaded pdfs_manual/doc_27.pdf
Downloaded pdfs_manual/doc_28.pdf
Downloaded pdfs_manual/d

In [3]:
from huggingface_hub import login

login(token=os.getenv("TOKEN"))

In [4]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import DocArrayInMemorySearch

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")  

vectorstore = DocArrayInMemorySearch.from_documents(all_documents, embedding=embedding_model)
print(vectorstore)


  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")


<langchain_community.vectorstores.docarray.in_memory.DocArrayInMemorySearch object at 0x77f9c08050a0>


In [5]:
import pandas as pd

df = pd.DataFrame([d.page_content for d in all_documents], columns=["text"])

df.head(100)

Unnamed: 0,text
0,"ATRIPLA ACCESS- efavirenz, emtricitabine, and ..."
1,discontinuation of ATRIPLA. Closely monitor he...
2,agents for the treatment of HIV-1 infection in...
3,2.5\n)\nDOSAGE FORMS AND STRENGTHS\nTablets: 6...
4,WARNINGS AND PRECAUTIONS\nRash: Discontinue if...
...,...
95,± 3.1 µg∙hr/mL. The mean steady state plasma t...
96,Tenofovir DF:\n \nFollowing oral administratio...
97,normal renal function of 243 ± 33 mL/min (mean...
98,among the racial groups studied.\nEmtricitabin...


In [6]:
import giskard

# Setarea endpointului local pentru Ollama
api_base = "http://localhost:11434"

# Setează modelul principal LLM (ex: Qwen2.5 sau llama2, în funcție de ce ai instalat în Ollama)
# giskard.llm.set_llm_model("ollama/mistral", api_base="http://localhost:11434", disable_structured_output=True)
giskard.llm.set_llm_model("gpt-3.5-turbo")
# Setează modelul de embedding (ex: nomic-embed-text, disponibil prin Ollama)
# giskard.llm.set_embedding_model("ollama/nomic-embed-text", api_base=api_base)
giskard.llm.set_embedding_model("text-embedding-3-small")

In [7]:
import nest_asyncio
nest_asyncio.apply()

In [8]:
from giskard.rag import KnowledgeBase

knowledge_base = KnowledgeBase(df)

In [22]:
from giskard.rag import generate_testset

testset = generate_testset(
    knowledge_base,
    num_questions=3,
    agent_description="A chatbot answering questions about medicine drugs",
)

Generating questions:   0%|          | 0/3 [00:00<?, ?it/s]

In [23]:
test_set_df = testset.to_pandas()

for index, row in enumerate(test_set_df.head(3).iterrows()):
    print(f"Question {index + 1}: {row[1]['question']}")
    print(f"Reference answer: {row[1]['reference_answer']}")
    print("Reference context:")
    print(row[1]['reference_context'])
    print("------------------------------", end="\n\n")

Question 1: What should you do if you are breastfeeding and taking ATRIPLA?
Reference answer: If you are breastfeeding or plan to breastfeed, you should not breastfeed because of the risk of passing HIV-1 to your baby while taking ATRIPLA.
Reference context:
Document 167: ATRIPLA during pregnancy. The purpose of this registry is to collect information
about the health of you and your baby. Talk to your healthcare provider about
how you can take part in this registry.
are breastfeeding or plan to breastfeed. ATRIPLA can pass into your breast milk. Do
not breastfeed because of the risk of passing HIV-1 to your baby.
Tell your healthcare provider about all the medicines you take
, including
prescription and over-the-counter medicines, vitamins and herbal supplements.
Keep a list of your medicines and show it to your healthcare provider and pharmacist
when you get a new medicine. 
ATRIPLA and some medicines may interact with each other causing serious side effects.
You can ask your healthc

In [24]:
testset.save("test-set.jsonl")

In [25]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
print(prompt.format(context="Here is some context", question="Here is a question"))


Answer the question based on the context below. If you can't
answer the question, reply "I don't know".

Context: Here is some context

Question: Here is a question



In [26]:
import requests
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser



def call_asi_one(prompt):
    if hasattr(prompt, "to_string"):
        prompt = prompt.to_string()  # Convert PromptValue to str

    url = "https://api.asi1.ai/v1/chat/completions"
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {ASI_ONE_KEY}'
    }
    payload = {
        "model": MODEL,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.2,
        "max_tokens": 1000
    }
    response = requests.post(url, headers=headers, json=payload)
    return response.json().get("choices", [{}])[0].get("message", {}).get("content", "No response")
# Create the RAG chain
chain = ({
    "context": itemgetter("question") | vectorstore.as_retriever(),
    "question": itemgetter("question"),
}
| prompt
| call_asi_one
| StrOutputParser()
)

In [15]:
chain.invoke({"question": "What is ATRIPLA?"})



In [27]:
def answer_fn(question, history=None):
    return chain.invoke({"question": question})


In [28]:
from giskard.rag import evaluate

report = evaluate(answer_fn, testset=testset, knowledge_base=knowledge_base)
display(report)

Asking questions to the agent:   0%|          | 0/3 [00:00<?, ?it/s]

CorrectnessMetric evaluation:   0%|          | 0/3 [00:00<?, ?it/s]

In [29]:
report.to_html("report.html")

In [30]:
report.correctness_by_question_type()

Unnamed: 0_level_0,correctness
question_type,Unnamed: 1_level_1
complex,1.0
distracting element,0.0
simple,1.0


In [31]:
report.get_failures()

Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata,agent_answer,correctness,correctness_reason
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
82a55fb0-eda8-4180-89ae-92696b2f79d2,Are there any contraindications for using ATRI...,The active ingredients in ATRIPLA are efaviren...,Document 176: Medicines are sometimes prescrib...,[],"{'question_type': 'distracting element', 'seed...","Yes, there are contraindications for using ATR...",False,The agent provided information about contraind...


In [32]:
import pandas as pd

# Assuming `report.get_failures()` returns a list of dicts or objects
failures = report.get_failures()

# If it's already a list of dictionaries, this will work:
df_failures = pd.DataFrame(failures)

# Save to Excel
df_failures.to_excel("giskard_failures.xlsx", index=False)
print("Failures saved to giskard_failures.xlsx")

Failures saved to giskard_failures.xlsx
