# Pinecone Vector Database Integration

**References:**

    - [LanChain Pinecone](https://python.langchain.com/v0.2/docs/integrations/vectorstores/pinecone/)
    - [PineconeVectorStore API](https://api.python.langchain.com/en/latest/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html)

In [2]:
import os
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec
from langchain_community.document_loaders import TextLoader
from langchain_openai import AzureOpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_pinecone import PineconeVectorStore



  from tqdm.autonotebook import tqdm


In [3]:
load_dotenv("../.env")

True

In [7]:
# prepare modules for document extraction
loader = TextLoader("../data/druginsights_prettify_test_2.txt")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = AzureOpenAIEmbeddings(
    azure_deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"],
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
)

docs

 Document(page_content="drugs.' dosages_and_how_it_should_be_taken='Treatment should be initiated with very low doses. In some children, the diuretic dose may need to be reduced or the diuretic discontinued at least 24 hours beforehand. If high-dose diuretic therapy cannot be stopped, close observation is recommended after administration of the first dose of ACE inhibitor, for at least 2 hours or until the blood pressure has stabilised.' what_happens_if_i_miss_a_dose='Not specified in the provided text.' what_happens_if_i_overdose='Not specified in the provided text.' what_should_i_avoid_while_taking_the_medication='Potassium-sparing diuretics or potassium-containing salt substitutes should be avoided as they increase the risk of hyperkalaemia. NSAIDs should also be avoided as they increase the risk of renal damage.' side_effects='Side effects can include hyperkalaemia, dry cough, apnoea, seizures, renal failure, and severe unpredictable hypotension.' drugs_that_cause_interactions='NSA

In [6]:
# Prepare pinecone

PC = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
PC_INDEX_NAME = os.environ["PINECONE_INDEX_NAME"]
PC.list_indexes()

{'indexes': [{'dimension': 1536,
              'host': 'axum-druginsights-fmmnhqp.svc.aped-4627-b74a.pinecone.io',
              'metric': 'cosine',
              'name': 'axum-druginsights',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [8]:
# run this only once, if the index is not created

# PC.create_index(
#             name=PC_INDEX_NAME,
#             dimension=1536,
#             metric="cosine",
#             spec=ServerlessSpec(cloud="aws", region="us-east-1"),
#         )

In [10]:
# Adding documents to the vector database. Be careful, this will overwrite the previous documents.
# If you want to add more documents, you can use the .add_documents method instead.
# run only once for the same documents
# vectorstore = PineconeVectorStore.from_documents(docs, embeddings, index_name=PC_INDEX_NAME)

# Or, load an existing vector store
vectorstore = PineconeVectorStore(embedding=embeddings, index_name=PC_INDEX_NAME)


In [11]:
# let us test out the document search

query = "ACE inhibitors"
docs = vectorstore.similarity_search(query, k=3)
print(len(docs))
print(docs[0].page_content)

3
