In [4]:
import warnings
warnings.filterwarnings('ignore')
from langchain_core.messages import SystemMessage, HumanMessage
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

load_dotenv()


True

In [5]:
import getpass
import os
import time

from pinecone import Pinecone, ServerlessSpec

if not os.getenv("PINECONE_API_KEY"):
    os.environ["PINECONE_API_KEY"] = getpass.getpass("Enter your Pinecone API key: ")

pinecone_api_key = os.environ.get("PINECONE_API_KEY")

pc = Pinecone(api_key=pinecone_api_key)

In [6]:
import time

index_name = "langchain-rag"  # change if desired

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=3072,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [7]:
from langchain_openai import OpenAIEmbeddings
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")


embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [8]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [9]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

['9bf547eb-aa37-4d4c-ae0e-3e6108792cfb',
 'f1eb7f07-5c04-43ac-bb26-e3e898595a10',
 'da034962-7da7-4a06-8a4a-2985fb25202b',
 '4b77df16-1d67-42ed-846d-838c4e891fb7',
 '61d3f7ab-8da6-4bde-bd81-bf9cf68d9b3d',
 'b506a0be-b78d-4d73-bfde-08448fc14013',
 '67bbf20b-6173-4509-9135-661f310276e5',
 '7d805195-1a56-4f86-a50c-eeb1dbe5556d',
 'a7c9ae3b-4453-41e6-8ad9-46c512293492',
 'f9c4fa67-1021-48a1-ac79-bc6ab63e522e']

In [10]:
results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=2,
)
results

[Document(id='da034962-7da7-4a06-8a4a-2985fb25202b', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='7d805195-1a56-4f86-a50c-eeb1dbe5556d', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!')]

In [14]:
results = vector_store.similarity_search_with_score(
    "Will it be hot tomorrow?", k=1
)
for res, score in results:
    print(f"* [SIM={score:.2f}] {res.page_content} [{res.metadata}]")
results

* [SIM=0.55] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]


[(Document(id='f1eb7f07-5c04-43ac-bb26-e3e898595a10', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
  0.553126454)]

In [18]:
retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 1, "score_threshold": 0.5},
)
results = retriever.invoke("Stealing from the bank is a crime")

# print(results)
for doc in results:
    print(f"{doc.metadata}: {doc.page_content}")

{'source': 'news'}: Robbers broke into the city bank and stole $1 million in cash.


In [20]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

llm = ChatOpenAI(model="gpt-4o-mini")
system_prompt = (
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentence maximum and keep the answer concise. "
    "Context: {context}"
)
prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        ("human", "{input}"),
    ])

question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)

chain.invoke({"input": "What is the down points in stock market"})


{'input': 'What is the down points in stock market',
 'context': [Document(id='a7c9ae3b-4453-41e6-8ad9-46c512293492', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.')],
 'answer': 'The stock market is down 500 points today. This decline is attributed to fears of a recession.'}

In [22]:
# from langchain_community.llms import OpenAI
from langchain.chains import RetrievalQA

retrievalQA = RetrievalQA.from_llm(llm=llm, retriever=retriever)

retrievalQA.invoke("What is the down points in stock market")

{'query': 'What is the down points in stock market',
 'result': 'The stock market is down 500 points today due to fears of a recession.'}