In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

# Load documents (example with PDFs)
loader = PyPDFLoader("Roadmap-Assessment-7.pdf")
docs = loader.load()

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=100)
chunks = text_splitter.split_documents(docs)

# Embed and store chunks
embeddings = OpenAIEmbeddings(openai_api_key="api_key")
vector_store = FAISS.from_documents(chunks, embeddings)

# Save vector store for retrieval
vector_store.save_local("faiss_index")

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI

# Split documents into optimized chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=100)
chunks = text_splitter.split_documents(docs)

# Create embeddings and store chunks efficiently
embeddings = OpenAIEmbeddings(openai_api_key="api_key")
vector_store = FAISS.from_documents(chunks, embeddings)

# Save the indexed embeddings
vector_store.save_local("faiss_index")

def retrieve_top_results(query, top_k=5):
    search_results = vector_store.similarity_search(query, k=top_k)
    return [result.page_content for result in search_results]

query = "Explain RAG optimization techniques"
retrieved_chunks = retrieve_top_results(query)
print(retrieved_chunks)

llm = OpenAI(openai_api_key="api_key")

response = llm.predict(f"Using this information: {retrieved_chunks}. Provide an answer.")
print(response)
