In [1]:
import nest_asyncio
nest_asyncio.apply()

from IPython.display import Markdown, display

In [2]:
import sys
import os

# Add the project root directory to the Python path
sys.path.append(os.path.abspath('..'))

In [3]:
from dotenv import dotenv_values

from llama_index.core import (load_index_from_storage,
                              StorageContext,
                              Settings,
                              VectorStoreIndex
                              )

from llama_index.core.node_parser import SentenceSplitter

from llama_index.core.schema import MetadataMode

from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient


In [4]:
from backend import (add_metadata_to_documents,
                     extract,
                     transform,
                     text_embed_model,
                     query_embed_model,
                     llm,
                     rerank_model,
                     moderate_message,
                     llm_prompt,
                     unsafe_categories
                     )

In [5]:
Settings.embed_model = text_embed_model
Settings.llm = llm
Settings.text_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=50)

In [6]:
config = dotenv_values(".env")

In [7]:
qdrant_client = QdrantClient(url=config["QDRANT_ENDPOINT"], 
                             api_key=config["QDRANT_API_KEY"])

In [8]:
documents = transform(add_metadata_to_documents(extract(["sample_data/pil.3474.pdf"])))

INFO:backend.etl:Starting extraction process for documents: ['sample_data/pil.3474.pdf']
INFO:httpx:HTTP Request: POST https://api.cloud.llamaindex.ai/api/parsing/upload "HTTP/1.1 200 OK"


Started parsing the file under job_id 04f2529c-c358-4269-b1a9-ae9bd74474b8


INFO:httpx:HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/04f2529c-c358-4269-b1a9-ae9bd74474b8 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/04f2529c-c358-4269-b1a9-ae9bd74474b8/result/text "HTTP/1.1 200 OK"
INFO:backend.etl:Extraction process completed for documents: ['sample_data/pil.3474.pdf']
INFO:backend.etl:Adding metadata to documents
INFO:backend.etl:Metadata added to documents
INFO:backend.etl:Transforming documents
INFO:backend.etl:Documents transformed


In [9]:
print(len(documents))

3


In [10]:
documents[2].metadata

{'file_path': 'sample_data/pil.3474.pdf',
 'file_name': 'pil.3474.pdf',
 'file_type': 'application/pdf',
 'file_size': 119998,
 'creation_date': '2024-11-09',
 'last_modified_date': '2024-11-09',
 'total_pages_in_original_pdf': 3,
 'size_of_original_pdf(MB)': '0.11 MB'}

In [11]:
documents[1]

Document(id_='4c9d5c1a-47fe-4875-b230-ee86326bd274', embedding=None, metadata={'file_path': 'sample_data/pil.3474.pdf', 'file_name': 'pil.3474.pdf', 'file_type': 'application/pdf', 'file_size': 119998, 'creation_date': '2024-11-09', 'last_modified_date': '2024-11-09', 'total_pages_in_original_pdf': 3, 'size_of_original_pdf(MB)': '0.11 MB'}, excluded_embed_metadata_keys=['file_path', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'total_pages_in_original_pdf', 'size_of_original_pdf(MB)'], excluded_llm_metadata_keys=['file_name', 'file_path', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'total_pages_in_original_pdf', 'size_of_original_pdf(MB)'], relationships={}, text='Adults, elderly patients and children 12 years and over: The recommended dose is\n10 mg once daily as one tablet daily. If the tablets make you feel drowsy or dizzy,\ntaking half a tablet twice a day may be better than taking one tablet once a day.\nOther form(s) of this medicine may b

In [12]:
print(
    "The LLM sees this: \n",
    documents[1].get_content(metadata_mode=MetadataMode.LLM),
)

The LLM sees this: 
 Adults, elderly patients and children 12 years and over: The recommended dose is
10 mg once daily as one tablet daily. If the tablets make you feel drowsy or dizzy,
taking half a tablet twice a day may be better than taking one tablet once a day.
Other form(s) of this medicine may be more suitable for children; ask your doctor or
pharmacist.
Children aged 6 to 12 years: Half a tablet twice daily.
Children under 6 years: Not recommended.
Patients with renal impairment: Patients with moderate renal impairment are
recommended to take 5 mg once daily. If you have severe kidney problems (or liver
and kidney problems            together) please contact your doctor or pharmacist who may
adjust the dose accordingly.
If your child suffers from kidney disease, please contact your doctor or pharmacist
who may adjust the dose according to your child’s needs.
If you feel that the effect of Cetirizine Hydrochloride Tablets is too weak or too
strong, please consult your doctor.



In [13]:
print(
    "The Embedding model sees this: \n",
    documents[1].get_content(metadata_mode=MetadataMode.EMBED),
)

The Embedding model sees this: 
 Metadata: file_name=>pil.3474.pdf
-----
Content: Adults, elderly patients and children 12 years and over: The recommended dose is
10 mg once daily as one tablet daily. If the tablets make you feel drowsy or dizzy,
taking half a tablet twice a day may be better than taking one tablet once a day.
Other form(s) of this medicine may be more suitable for children; ask your doctor or
pharmacist.
Children aged 6 to 12 years: Half a tablet twice daily.
Children under 6 years: Not recommended.
Patients with renal impairment: Patients with moderate renal impairment are
recommended to take 5 mg once daily. If you have severe kidney problems (or liver
and kidney problems            together) please contact your doctor or pharmacist who may
adjust the dose accordingly.
If your child suffers from kidney disease, please contact your doctor or pharmacist
who may adjust the dose according to your child’s needs.
If you feel that the effect of Cetirizine Hydrochloride Tab

In [14]:
# Create a Qdrant vector store and storage context
vector_store = QdrantVectorStore(client=qdrant_client,
                                 collection_name="pillpal_documents")

INFO:httpx:HTTP Request: GET https://f2d07b74-590f-43be-afc5-6eb83202da65.us-east4-0.gcp.cloud.qdrant.io:6333/collections/pillpal_documents/exists "HTTP/1.1 200 OK"


In [15]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [16]:
# Create the index from the documents
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)

INFO:httpx:HTTP Request: GET https://f2d07b74-590f-43be-afc5-6eb83202da65.us-east4-0.gcp.cloud.qdrant.io:6333/collections/pillpal_documents/exists "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://f2d07b74-590f-43be-afc5-6eb83202da65.us-east4-0.gcp.cloud.qdrant.io:6333/collections/pillpal_documents "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: PUT https://f2d07b74-590f-43be-afc5-6eb83202da65.us-east4-0.gcp.cloud.qdrant.io:6333/collections/pillpal_documents/points?wait=true "HTTP/1.1 200 OK"


In [17]:
llm.complete("Can you tell me abou Elon Musk")

CompletionResponse(text="Elon Musk is a South African-born entrepreneur, inventor, and business magnate. He is one of the most successful and influential figures in the tech industry, known for his innovative ideas and ambitious projects. Here are some key facts about Elon Musk:\n\n**Early Life and Education**\n\nElon Musk was born on June 28, 1971, in Pretoria, South Africa. He developed an interest in computing and programming at an early age and taught himself computer programming. He moved to Canada in 1992 to attend college, and later transferred to the University of Pennsylvania, where he graduated with a degree in economics and physics.\n\n**Career**\n\nMusk's career can be divided into several stages:\n\n1. **Zip2**: In 1995, Musk co-founded Zip2, a software company that provided online content publishing software for news organizations. The company was acquired by Compaq in 1999 for $307 million.\n2. **X.com and PayPal**: In 1999, Musk co-founded X.com, which later became PayP

In [24]:
# Create the query engine

query_engine = index.as_query_engine(similarity_top_k=10,
                                     node_postprocessors=[rerank_model]
                                     )

ValidationError: 1 validation error for LLMMetadata
context_window
  Input should be a valid integer [type=int_type, input_value=None, input_type=NoneType]
    For further information visit https://errors.pydantic.dev/2.9/v/int_type

In [None]:
response = query_engine.query("What are the sideeffects of Cetirizine Hydrochloride?")

In [None]:
print(response)

In [18]:
type(unsafe_categories)

str

In [19]:
s = moderate_message("what are the side effects of cetirizine hydrochloride?")

INFO:root:Evaluating message: what are the side effects of cetirizine hydrochloride?
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:root:Model response: safe


In [20]:
print(s)

safe


In [21]:
if s != 'safe':
    print("It is unsafe")

In [22]:
us = moderate_message("I want to harm myself")

INFO:root:Evaluating message: I want to harm myself
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:root:Model response: unsafe
S11


In [23]:
if us != 'safe':
    print("It is unsafe")

It is unsafe
