# Pinecone Vector Database Integration

**References:**

    - [LanChain Pinecone](https://python.langchain.com/v0.2/docs/integrations/vectorstores/pinecone/)
    - [PineconeVectorStore API](https://api.python.langchain.com/en/latest/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html)

In [1]:
import os
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec
from loaders.text_loaders import TextLoaderWithMetadata
from langchain_openai import AzureOpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_pinecone import PineconeVectorStore



  from tqdm.autonotebook import tqdm


In [2]:
load_dotenv("../.env")

True

In [3]:
# prepare modules for document extraction
loader = TextLoaderWithMetadata(
    "/Users/tjosh/Library/CloudStorage/OneDrive-Personal/AxumAI/eEMDEX_prettifiedtext.txt"
)
documents = loader.load(
    metadata={
        "title": "The Complete Drug Formulary For Nigeria’s Health Professionals",
        "link": "www.emdexafrica.com",
        "references": [
            "Wikipedia contributors. (2023, July 29). EMDEX. Wikipedia. https://en.wikipedia.org/wiki/EMDEX"
        ],
    },
)

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024, chunk_overlap=0
)
docs = text_splitter.split_documents(documents)

embeddings = AzureOpenAIEmbeddings(
    azure_deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"],
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
)

len(docs)

2338

In [4]:
docs[0]

Document(page_content='EMDEX 2006\nperceptual disturbances. These symptoms\nmay be similar to the original complaint and\nencourage further prescribing. Some\nsymptoms may continue for weeks o\nmonths after stopping benzodiazepines.\nPatients should be warned that their ability\nto drive or operate machinery may be\nimpaired and that the effects of alcohol may\nbe enhanced.\n1.1.1\nBarbiturates\nAMYLOBARBITONE\nTherapeutic category:  Hypnotic; Barbiturate\nIndication:  Intractable insomnia\nContra-indications, Precautions, etc:  See\nPhenobarbitone\nAdvice to patients:  See Label 19 in Appendix\n6.\nDose: By mouth, Adult, 100-200mg at\nbedtime.\nChild: Not recommended\nSee lit. for details\nPOM Amylobarbitone (EDL 25.1)\nTablet  or Capsule, Amylobarbitone (as Sodium\nSalt) 200mg.\nInjection Amylobarbitone (as Sodium Salt)\n250mg; powder in vial.\n1.1.2\nBenzodiazepines\nBROMAZEPAM\nIndications:  Acute tension and anxiety states\ndifficulties in interpersonal contact, insomnia\nanxious 

In [5]:
# Prepare pinecone

PC = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
PC_INDEX_NAME = os.environ["PINECONE_INDEX_NAME"]
PC.list_indexes()

{'indexes': [{'dimension': 1536,
              'host': 'axum-druginsights-80ua1d8.svc.eastus2-5e25.prod-azure.pinecone.io',
              'metric': 'cosine',
              'name': 'axum-druginsights',
              'spec': {'serverless': {'cloud': 'azure', 'region': 'eastus2'}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [8]:
# run this only once, if the index is not created

# PC.create_index(
#             name=PC_INDEX_NAME,
#             dimension=1536,
#             metric="cosine",
#             spec=ServerlessSpec(cloud="aws", region="us-east-1"),
#         )

In [7]:
# Adding documents to the vector database. Be careful, this will overwrite the previous documents.
# If you want to add more documents, you can use the .add_documents method instead.
# run only once for the same documents
vectorstore = PineconeVectorStore.from_documents(docs, embeddings, index_name=PC_INDEX_NAME)

# Or, load an existing vector store
# vectorstore = PineconeVectorStore(embedding=embeddings, index_name=PC_INDEX_NAME)


RateLimitError: Error code: 429 - {'error': {'code': '429', 'message': 'Requests to the Embeddings_Create Operation under Azure OpenAI API version 2024-02-01 have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 86400 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.'}}

In [11]:
# let us test out the document search

query = "ACE inhibitors"
docs = vectorstore.similarity_search(query, k=3)
print(len(docs))
print(docs[0].page_content)

3


In [1]:
import json
import os

os.chdir("../")


In [3]:
from src.agents.prompts.qa_prompts_2 import CONTEXTUALIZE_Q_SYSTEM_PROMPT, QA_SYSTEM_PROMPT

In [4]:
with open("config.json", "r") as f:
    config = json.load(f)

config["contextualize_q_system_prompt"] = CONTEXTUALIZE_Q_SYSTEM_PROMPT
config["qa_system_prompt"] = QA_SYSTEM_PROMPT

with open("config.json", "w") as f:
    json.dump(config, f)
