In [30]:
import nest_asyncio
nest_asyncio.apply()

from IPython.display import Markdown, display

In [31]:
import sys
import os

# Add the project root directory to the Python path
sys.path.append(os.path.abspath('..'))

In [32]:
from dotenv import dotenv_values

from llama_index.core import (load_index_from_storage,
                              StorageContext,
                              Settings,
                              VectorStoreIndex
                              )

from llama_index.core.node_parser import SentenceSplitter

from llama_index.core.schema import MetadataMode

from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient


In [33]:
from backend import (add_metadata_to_documents,
                     extract,
                     transform,
                     text_embed_model,
                     query_embed_model,
                     llm,
                     rerank_model,
                     moderate_message,
                     llm_prompt,
                     unsafe_categories
                     )

In [34]:
Settings.embed_model = text_embed_model
Settings.llm = llm
Settings.text_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=50)

In [38]:
config = dotenv_values(".env")



In [39]:
qdrant_client = QdrantClient(url=config["QDRANT_ENDPOINT"], 
                             api_key=config["QDRANT_API_KEY"])

In [40]:
documents = transform(add_metadata_to_documents(extract(["sample_data/pil.3474.pdf"])))

INFO:backend.etl:Starting extraction process for documents: ['sample_data/pil.3474.pdf']
INFO:httpx:HTTP Request: POST https://api.cloud.llamaindex.ai/api/parsing/upload "HTTP/1.1 200 OK"


Started parsing the file under job_id 36279206-66e8-47aa-a861-01d1fc108f11


INFO:httpx:HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/36279206-66e8-47aa-a861-01d1fc108f11 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/36279206-66e8-47aa-a861-01d1fc108f11 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/36279206-66e8-47aa-a861-01d1fc108f11 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/36279206-66e8-47aa-a861-01d1fc108f11 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/36279206-66e8-47aa-a861-01d1fc108f11/result/text "HTTP/1.1 200 OK"
INFO:backend.etl:Extraction process completed for documents: ['sample_data/pil.3474.pdf']
INFO:backend.etl:Adding metadata to documents
INFO:backend.etl:Metadata added to documents
INFO:backend.etl:Transforming documents
INFO:backend.etl:Documents transformed


In [41]:
print(len(documents))

3


In [42]:
documents[2].metadata

{'file_path': 'sample_data/pil.3474.pdf',
 'file_name': 'pil.3474.pdf',
 'file_type': 'application/pdf',
 'file_size': 119998,
 'creation_date': '2024-11-23',
 'last_modified_date': '2024-11-23',
 'total_pages_in_original_pdf': 3,
 'size_of_original_pdf(MB)': '0.11 MB'}

In [43]:
documents[1]

Document(id_='ecb72068-8dca-4940-8e9b-2ad090c232ef', embedding=None, metadata={'file_path': 'sample_data/pil.3474.pdf', 'file_name': 'pil.3474.pdf', 'file_type': 'application/pdf', 'file_size': 119998, 'creation_date': '2024-11-23', 'last_modified_date': '2024-11-23', 'total_pages_in_original_pdf': 3, 'size_of_original_pdf(MB)': '0.11 MB'}, excluded_embed_metadata_keys=['file_path', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'total_pages_in_original_pdf', 'size_of_original_pdf(MB)'], excluded_llm_metadata_keys=['file_name', 'file_path', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'total_pages_in_original_pdf', 'size_of_original_pdf(MB)'], relationships={}, metadata_template='{key}=>{value}', metadata_separator='\n', text='Adults, elderly patients and children 12 years and over: The recommended dose is\n10 mg once daily as one tablet daily. If the tablets make you feel drowsy or dizzy,\ntaking half a tablet twice a day may be better than taking

In [45]:
print(
    "The LLM sees this: \n",
    documents[1].get_content(metadata_mode=MetadataMode.LLM),
)

The LLM sees this: 
 Adults, elderly patients and children 12 years and over: The recommended dose is
10 mg once daily as one tablet daily. If the tablets make you feel drowsy or dizzy,
taking half a tablet twice a day may be better than taking one tablet once a day.
Other form(s) of this medicine may be more suitable for children; ask your doctor or
pharmacist.
Children aged 6 to 12 years: Half a tablet twice daily.
Children under 6 years: Not recommended.
Patients with renal impairment: Patients with moderate renal impairment are
recommended to take 5 mg once daily. If you have severe kidney problems (or liver
and kidney problems            together) please contact your doctor or pharmacist who may
adjust the dose accordingly.
If your child suffers from kidney disease, please contact your doctor or pharmacist
who may adjust the dose according to your child’s needs.
If you feel that the effect of Cetirizine Hydrochloride Tablets is too weak or too
strong, please consult your doctor.



In [46]:
print(
    "The Embedding model sees this: \n",
    documents[1].get_content(metadata_mode=MetadataMode.EMBED),
)

The Embedding model sees this: 
 Metadata: file_name=>pil.3474.pdf
-----
Content: Adults, elderly patients and children 12 years and over: The recommended dose is
10 mg once daily as one tablet daily. If the tablets make you feel drowsy or dizzy,
taking half a tablet twice a day may be better than taking one tablet once a day.
Other form(s) of this medicine may be more suitable for children; ask your doctor or
pharmacist.
Children aged 6 to 12 years: Half a tablet twice daily.
Children under 6 years: Not recommended.
Patients with renal impairment: Patients with moderate renal impairment are
recommended to take 5 mg once daily. If you have severe kidney problems (or liver
and kidney problems            together) please contact your doctor or pharmacist who may
adjust the dose accordingly.
If your child suffers from kidney disease, please contact your doctor or pharmacist
who may adjust the dose according to your child’s needs.
If you feel that the effect of Cetirizine Hydrochloride Tab

In [47]:
# Create a Qdrant vector store and storage context
vector_store = QdrantVectorStore(client=qdrant_client,
                                 collection_name="pillpal_documents")

INFO:httpx:HTTP Request: GET https://f2d07b74-590f-43be-afc5-6eb83202da65.us-east4-0.gcp.cloud.qdrant.io:6333/collections/pillpal_documents/exists "HTTP/1.1 200 OK"


In [48]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [50]:
# Create the index from the documents
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)

INFO:httpx:HTTP Request: GET https://f2d07b74-590f-43be-afc5-6eb83202da65.us-east4-0.gcp.cloud.qdrant.io:6333/collections/pillpal_documents/exists "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://f2d07b74-590f-43be-afc5-6eb83202da65.us-east4-0.gcp.cloud.qdrant.io:6333/collections/pillpal_documents "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: PUT https://f2d07b74-590f-43be-afc5-6eb83202da65.us-east4-0.gcp.cloud.qdrant.io:6333/collections/pillpal_documents/points?wait=true "HTTP/1.1 200 OK"


In [51]:
# Create the query engine

query_engine = index.as_query_engine(similarity_top_k=10,
                                     node_postprocessors=[rerank_model]
                                     )

In [53]:
response = query_engine.query("What is Cetirizine Hydrochloride?")

INFO:httpx:HTTP Request: POST https://f2d07b74-590f-43be-afc5-6eb83202da65.us-east4-0.gcp.cloud.qdrant.io:6333/collections/pillpal_documents/points/search "HTTP/1.1 200 OK"


In [54]:
display(Markdown(f"{response}"))

Cetirizine Hydrochloride is an active ingredient that belongs to a family of medicines called antihistamines.

In [55]:
print(response.source_nodes[0].text, response.source_nodes[0].score)
print("\n")

PACKAGE LEAFLET INFORMATION FOR THE USER
                   Cetirizine Hydrochloride 10mg Tablets
Read all this leaflet carefully before you start taking this medicine
because it contains important information for you.
Always take this medicine exactly as described in this leaflet or as your doctor or
pharmacist has told you.
 -      Keep this leaflet. You may need to read it again.
 -      Ask your pharmacist if you need more information or advice.
 -      If you get any side effects talk to your doctor or pharmacist. This includes any
        possible side effects not listed in this leaflet. See section 4.
 -      You must talk to your doctor if you do not feel better or if you feel worse after 3
        days.

What is in this leaflet:
   1. What Cetirizine Hydrochloride Tablets are and what they are
      used for
   2. What you need to know before you take Cetirizine Hydrochloride Tablets
   3. How to take Cetirizine Hydrochloride Tablets
   4. Possible side effects
   5. How to st

In [56]:
llm.complete("Hey")

CompletionResponse(text='How can I assist you today?', additional_kwargs={'id': '04a66700-96db-4f09-ab0e-13181613167a', 'finish_reason': 'stop', 'usage': {'completion_tokens': 7, 'completion_tokens_after_first_per_sec': 196.41312135615445, 'completion_tokens_after_first_per_sec_first_ten': 1330.7435315766932, 'completion_tokens_per_sec': 46.34143831454272, 'end_time': 1732322743.7714193, 'is_last_response': True, 'prompt_tokens': 36, 'start_time': 1732322743.6203666, 'time_to_first_token': 0.12050485610961914, 'total_latency': 0.15105271339416504, 'total_tokens': 43, 'total_tokens_per_sec': 284.66883536076244}, 'model_name': 'Meta-Llama-3.2-3B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1732322743}, raw=None, logprobs=None, delta=None)

In [57]:
type(unsafe_categories)

str

In [58]:
s = moderate_message("what are the side effects of cetirizine hydrochloride?")

INFO:root:Evaluating message: what are the side effects of cetirizine hydrochloride?
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:root:Model response: safe


In [59]:
print(s)

safe


In [60]:
if s != 'safe':
    print("It is unsafe")

In [61]:
us = moderate_message("I want to harm myself")

INFO:root:Evaluating message: I want to harm myself
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:root:Model response: unsafe
S11


In [62]:
if us != 'safe':
    print("It is unsafe")

It is unsafe
