**Retrieval-Augmented Generation (RAG)** is a method that integrates information retrieval to give generative language models additional information. 

**A typical RAG pipeline comprises of 2 main components:**

1. a **Retriever Module** that first selects relevant documents or pieces of information from a large corpus based on the input query,
2. an **Answer Generation Module** that produces more accurate and contextually relevant responses.

#### Steps to implement a RAG pipeline (Part 1/2):
1. Indexing Documents

2. Creating Embeddings

3. Create a vector store and store embeddings

In [None]:
pip install -r requirements.txt

In [1]:

from dotenv import load_dotenv
from colorama import Fore
import warnings
warnings.filterwarnings("ignore")

load_dotenv()

True

1. Load and split documents

In [3]:
from langchain_community.document_loaders import TextLoader 
from langchain.text_splitter import CharacterTextSplitter 

def load_documents():
    """Load a file from path, split it into chunks, embed each chunk and load it into the vector store."""
    loader = TextLoader("./docs/user-manual.txt")
    raw_text = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0, separator="\n\n")
    return text_splitter.split_documents(raw_text)

documents = load_documents()
print(f"Loaded {len(documents)} documents")

Created a chunk of size 198, which is longer than the specified 100
Created a chunk of size 390, which is longer than the specified 100
Created a chunk of size 232, which is longer than the specified 100
Created a chunk of size 252, which is longer than the specified 100
Created a chunk of size 221, which is longer than the specified 100
Created a chunk of size 280, which is longer than the specified 100
Created a chunk of size 133, which is longer than the specified 100


Loaded 8 documents


2. Create vector store and store embeddings

In [4]:
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from openai import OpenAI
import os

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def get_embedding(text_to_embed):
    response = client.embeddings.create(
        model= "text-embedding-ada-002",
        input=[text_to_embed]
    )
    print(response.data[0].embedding)

def load_embeddings(user_query, documents):
    """Create a vector store from a set of documents."""
    embeddings = OpenAIEmbeddings()
    db = Chroma.from_documents(documents, embeddings)
    get_embedding(user_query)
    _ = [get_embedding(doc.page_content) for doc in documents]
    return db.as_retriever()

retriever = load_embeddings("I have an error code E2", documents)

  warn_deprecated(


[-0.015627305954694748, -0.0038546891883015633, -0.015446563251316547, -0.024010995402932167, -0.028390534222126007, 0.014181363396346569, -0.023232409730553627, -0.010239779017865658, -0.003701752983033657, -0.00817513931542635, 0.007980492897331715, 0.0031612622551620007, 0.0014546324964612722, -0.005293680354952812, -0.0187972579151392, 0.00027133154799230397, 0.006451129913330078, 0.01373645756393671, 0.012652000412344933, -0.005749013740569353, -0.004056286998093128, 0.0016136515187099576, -0.013812926597893238, -0.006944697350263596, 0.006013176403939724, -0.014487235806882381, 0.003216875484213233, -0.017420832067728043, -0.016405891627073288, -0.014807011932134628, 0.012777130119502544, -0.026916783303022385, -0.022968247532844543, -0.043656352907419205, -0.028181985020637512, -0.004876581486314535, -0.019979039207100868, 0.001525886938907206, 0.022008920088410378, -0.01136594545096159, 0.04852250963449478, 0.013131664134562016, -0.0107681043446064, -0.010705539025366306, -0.02

#### Steps to implement a RAG pipeline (Part 2/2):
1. Define a prompt

2. Create and run the retrieval chain


In [5]:
from langchain_openai import ChatOpenAI
from langchain.prompts.chat import (
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain.prompts import ChatPromptTemplate, PromptTemplate

prompt: str = "You are a customer support specialist who answers questions {question} and assist users with general inquiries"
prompt_template = PromptTemplate.from_template(prompt)

template: str = """/
    You are a customer support specialist /
    question: {question}. You assist users with general inquiries based on {context} /
    and  technical issues. /
    """
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_message_prompt = HumanMessagePromptTemplate.from_template(
    input_variables=["question", "context"],
    template="{question}",
)
chat_prompt_template = ChatPromptTemplate.from_messages(
    [system_message_prompt, human_message_prompt]
)

model = ChatOpenAI()

2. Create and run the chain

without RAG

In [6]:
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

def generate_response(retriever, query):
    """Generate a response using the retriever and the query."""
    # Create a prompt template using a template from the config module and input variables
    # representing the context and question.
    # create the prompt
    chain = (
        {"question": RunnablePassthrough()} 
        | prompt_template 
        | model 
        | StrOutputParser()
    )
    return chain.invoke(query)

response = generate_response(retriever, "I have an error code E2")
print(f"{Fore.GREEN}{response}{Fore.RESET}")

TypeError: unsupported operand type(s) for |: 'dict' and 'str'

with RAG

In [None]:
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

def generate_response(retriever, query):
    """Generate a response using the retriever and the query."""
    # Create a prompt template using a template from the config module and input variables
    # representing the context and question.
    # create the prompt
    chain = (
        {"context": retriever, "question": RunnablePassthrough()} 
        | chat_prompt_template 
        | model 
        | StrOutputParser()
    )
    return chain.invoke(query)

response = generate_response(retriever, "I have an error code E2")
print(f"{Fore.GREEN}{response}{Fore.RESET}")