In [None]:
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chat_models import ChatOpenAI
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader



In [57]:
import os
os.environ['HF_TOKEN']=os.getenv("HF_TOKEN")
os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY", "")


In [58]:
llm = ChatOpenAI(temperature=0)


In [59]:
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [60]:

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]


In [61]:
index=faiss.IndexFlatIP(384)


In [62]:

vector_store.add_documents(documents=documents)


['176a1a7d-64ed-47da-8af3-77a6727c7656',
 '5176d35c-dd4c-4ba6-845b-922647ccddc0',
 '666c5d63-047b-45d5-94ce-259cb071e08b',
 '346426d5-384c-4566-b86c-0affcb012ae0',
 '021fbb5f-fe8b-4218-ac6d-069d818ceece',
 'bef45146-f375-431d-8f37-fc7e20f7edc6',
 '39773ea9-6272-482a-99ea-081cfe47a87e',
 '960fcceb-9e18-4d45-ad73-0aa621262d78',
 'ac15a732-6f08-4f76-b423-23342d6e4e11',
 'c038cbc1-e287-46b0-bc2b-4fb8000aedef']

In [63]:
retriever = vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={'k': 2}
)

In [64]:
retriever.invoke("what is llama model?")

[Document(id='666c5d63-047b-45d5-94ce-259cb071e08b', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='5176d35c-dd4c-4ba6-845b-922647ccddc0', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.')]

In [65]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [66]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)
    

In [67]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [68]:
rag_chain.invoke("what is the best framework for building stateful, agentic applications!")

'LangGraph is the best framework for building stateful, agentic applications.'