# Simple RAG method

In [None]:
# Data ingestion

from langchain_community.document_loaders import TextLoader

loader = TextLoader("speech.txt")
text_documents = loader.load()
text_documents

In [None]:
# Configuring environment variable

import os
from dotenv import load_dotenv

load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY','')

In [None]:
# Loader method 1: Web based loader
# Load, chunk and index the content of the html page

from langchain_community.document_loaders import WebBaseLoader
import bs4

loader = WebBaseLoader(
    web_path="https://lilianweng.github.io/posts/2023-06-23-agent/",
    bs_kwargs=dict(parse_only=bs4.SoupStrainer(
        class_=("post-title","post-content","post-header")
    )),
)

web_document = loader.load()
web_document

In [None]:
# Loader method 2: PDF reader

from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader('std7-bb-english.pdf')

pdf_document = loader.load()
pdf_document

In [None]:
# Document chunking

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200,
)

documents = text_splitter.split_documents(pdf_document)
documents

In [None]:
# Vector DB 1: Chroma Vector embeddings

from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

db = Chroma.from_documents(documents[:10], OpenAIEmbeddings())
db

In [None]:
# Chroma similarity search

query = "---insert query here---"

result = db.similarity_search(query)
result[0].page_content


In [None]:
# Vector DB 2: FAISS database

from langchain_community.vectorstores import FAISS

db1 = FAISS.from_documents(documents, OpenAIEmbeddings())
db1

In [None]:
# FAISS Similarity search

query = "---insert query here---"

result1 = db1.similarity_search(query)
result1[0].page_content

# Retriever and Chain with Langchain

In [None]:
from langchain_community.document_loaders import PyPDFLoader

data = PyPDFLoader("std7-bb-english.pdf")
data = data.load()

data

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200,
)

text_splitter.split_documents(data)[:5]

In [None]:
sample_splitted_data = text_splitter.split_documents(data)[:5]
sample_splitted_data

In [None]:
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings

from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(sample_splitted_data, OllamaEmbeddings())

db

In [None]:
query = ""

result = db.similarity_search(query)
result[0].page_content


In [None]:
from langchain_community.llms import Ollama
# Load Ollama LAMA2 LLM models
llm = Ollama(model="llama2")
llm

In [None]:
# Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based only on the provided content.
    Think step by step before providing a detailed summary.
    I will tip you $100 if user finds the answer helpful.
    <content>
    {context}
    </content>
    Question: {input}
    """
)


In [None]:
#  Chain 
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm, prompt)

In [None]:
retriever = db.as_retriever()
retriever

In [None]:
from langchain.chains import create_retrieval_chain

retriever_chain = create_retrieval_chain(retriever, document_chain)


In [None]:
retriever_chain.invoke({"input":"Canm you share the list of topics in index page"})

In [None]:
response = retriever_chain.invoke({"input":"Can you share the list of topics in index page"})
response['answer'] if 'answer' in response else response