# Step-by-step implementation of Naive RAG
The following are the steps to implement the Naive RAG using LangChain:
- Import necessary libraries
- OpenAI environment variable setup
- Data indexing process
- Retrieval process
- Augment process
- Generation process

## Import necessary libraries

In [0]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from utils import get_apikey 

## OpenAI environment variable setup


In [0]:
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] = get_apikey() # Add your OpenAI API Key
if OPENAI_API_KEY == "":
    raise ValueError("Please set the OPENAI_API_KEY environment variable")

## Data indexing process

In [0]:
DOC_PATH = "Transformer.pdf"
loader = PyPDFLoader(DOC_PATH)
pages = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(pages)

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

CHROMA_PATH = "/usr/local/notebooks"
db_chroma = Chroma.from_documents(chunks, embeddings, persist_directory=CHROMA_PATH)

## Retrieval process

In [0]:
query = 'What is Transformer?'
docs_chroma = db_chroma.similarity_search_with_score(query, k=5)
context_text = "\n\n".join([doc.page_content for doc, _score in docs_chroma])

## Augment process

In [0]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:
{context}
Answer the question based on the above context: {question}.
Provide a detailed answer.
Don’t justify your answers.
Don’t give information not mentioned in the CONTEXT INFORMATION.
Do not say "according to the context" or "mentioned in the context" or similar.
"""

prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=query)

## Generation process

In [0]:
model = ChatOpenAI()
response_text = model.invoke(prompt)
print(response_text)

content='Transformer is a general sequence processing tool that is used beyond language in various data modalities such as vision, audio, action, music, image generation, and neural signal processing. It is an architecture that involves attention mechanisms and is widely used for information processing in different fields.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 54, 'prompt_tokens': 179, 'total_tokens': 233}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-63c77321-c1c9-4eaf-b487-c954867f2f55-0' usage_metadata={'input_tokens': 179, 'output_tokens': 54, 'total_tokens': 233}
