# Retrieval Augmented Generation

Table of Contents
1. Idea
2. Naive Implementation
3. Graph Implementation

In [1]:
import pandas as pd
from language_models.proxy_client import BTPProxyClient
from language_models.agents.react import ReActAgent
from language_models.agents.chain import AgentChain
from language_models.tools.tool import Tool
from language_models.models.llm import OpenAILanguageModel
from language_models.models.embedding import SentenceTransformerEmbeddingModel
from language_models.retrievers import BasicRetriever, ContextualCompressionRetriever
from language_models.retrievers.utils import split_documents
from language_models.vector_stores import FAISSVectorStore, DistanceMetric
from language_models.settings import settings
from langchain_core.documents import Document
from pydantic import BaseModel, Field
from numpy import dot
from numpy.linalg import norm
from pathlib import Path
from langchain_core.documents import Document
from utils import load_docs_from_json, save_docs_to_json
from pprint import pprint

01/06/24 19:12:01 INFO Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
01/06/24 19:12:07 INFO Use pytorch device_name: mps
01/06/24 19:12:08 INFO Loading faiss.
01/06/24 19:12:08 INFO Successfully loaded faiss.


In [2]:
proxy_client = BTPProxyClient(
    client_id=settings.CLIENT_ID,
    client_secret=settings.CLIENT_SECRET,
    auth_url=settings.AUTH_URL,
    api_base=settings.API_BASE,
)

In [3]:
path = Path("./data/jobs")
filenames = [file.name for file in path.iterdir() if file.is_file()]

documents = []
for filename in filenames:
    file_path = path / filename
    with open(file_path, "r", encoding="utf-8", errors="replace") as file:
        content = file.read()
        documents.append(Document(page_content=content, metadata={"source": file_path}))

In [4]:
system_prompt = """Take the following job and extract data about the job.

Respond with the following extracted data:
- job_title: The job title."""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=128,
    float=0.0,
)

class Job(BaseModel):
    job_title: str = Field(description="The job title.")

job_data_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="Job description:\n{job}",
    task_prompt_variables=["job"],
    tools=None,
    output_format=Job,
    iterations=5,
)

In [5]:
def extract_job_titles(documents: list[Document]) -> pd.DataFrame:
    for document in documents:
        response = job_data_agent.invoke({"job": document.page_content})
        job_title = response.final_answer["job_title"]
        document.metadata["job_title"] = job_title
        job_data_agent.reset()
    return documents

In [6]:
try:
    documents = load_docs_from_json('./data/jobs.json')
except:
    documents = extract_job_titles(documents[:30])
    save_docs_to_json(documents, './data/jobs.json')

In [7]:
documents = split_documents(documents, separators=["\n\n", "\n", " ", ""], chunk_size=1000, chunk_overlap=100)

## Idea

Integrating RAG into AI systems can significantly enhance LLM responses. RAG allows the LLM to reference a knowledge base outside of its training data sources before generating a response. This is particularly useful when dealing with documents or content specific to your business that LLMs may not be familiar with. By using external information to ground the LLM, it can effectively answer questions related to those topics. However, careful implementation is essential.

![rag](../img/rag.png)

In [8]:
llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model="gpt-4",
    max_tokens=256,
    temperature=0.0,
)

In [9]:
system_prompt = "You are an expert in job postings. Respond with the most accurate information about the job."

class Output(BaseModel):
    content: str = Field(description="The final answer.")

agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="{question}",
    task_prompt_variables=["question"],
    output_format=Output,
    iterations=5,
)

In this example, we ask the LLM about the salary range for one of our job positions, an airport engineer. Since the LLM doesn't have specific information about our company, it uses its internal knowledge to generate an answer based on information it has encountered on the internet.

In [10]:
response = agent.invoke({"question": "What is the salary range of an airport engineer."})

01/06/24 19:12:10 INFO Prompt:
What is the salary range of an airport engineer.
01/06/24 19:12:15 INFO Raw response:
{
  "thought": "The salary range of an airport engineer can vary greatly depending on the location, level of experience, and the specific company. On average, in the United States, an airport engineer can expect to earn between $70,000 and $100,000 per year. However, this is a rough estimate and the actual salary can be lower or higher.",
  "tool": "Final Answer",
  "tool_input": {"content": "The salary range of an airport engineer can vary greatly depending on the location, level of experience, and the specific company. On average, in the United States, an airport engineer can expect to earn between $70,000 and $100,000 per year. However, this is a rough estimate and the actual salary can be lower or higher."}
}
01/06/24 19:12:15 INFO Thought:
The salary range of an airport engineer can vary greatly depending on the location, level of experience, and the specific compan

In [11]:
pprint(response.final_answer["content"])

('The salary range of an airport engineer can vary greatly depending on the '
 'location, level of experience, and the specific company. On average, in the '
 'United States, an airport engineer can expect to earn between $70,000 and '
 '$100,000 per year. However, this is a rough estimate and the actual salary '
 'can be lower or higher.')


Here, we provide the necessary information about the job from our document, which grounds the LLM so it can give the user an accurate response.

In [12]:
question = """What is the salary range of an airport engineer.

Use this context to answer the question:
AIRPORT ENGINEER
Class Code:       7256
Open Date:  07-06-18
(Exam Open to All, including Current City Employees)

ANNUAL SALARY

$105,005 to $153,509 and $111,854 to $163,532."""

response = agent.invoke({"question": question})

01/06/24 19:12:15 INFO Prompt:
What is the salary range of an airport engineer.

Use this context to answer the question:
AIRPORT ENGINEER
Class Code:       7256
Open Date:  07-06-18
(Exam Open to All, including Current City Employees)

ANNUAL SALARY

$105,005 to $153,509 and $111,854 to $163,532.
01/06/24 19:12:18 INFO Raw response:
{
  "thought": "The salary range for the airport engineer position is provided in the job posting.",
  "tool": "Final Answer",
  "tool_input": {
    "content": "The annual salary for an Airport Engineer as per the provided job posting ranges from $105,005 to $163,532."
  }
}
01/06/24 19:12:18 INFO Thought:
The salary range for the airport engineer position is provided in the job posting.
01/06/24 19:12:18 INFO Final answer:
{'content': 'The annual salary for an Airport Engineer as per the provided job posting ranges from $105,005 to $163,532.'}


In [13]:
pprint(response.final_answer["content"])

('The annual salary for an Airport Engineer as per the provided job posting '
 'ranges from $105,005 to $163,532.')


To effectively compare documents, we need to create embeddings that capture the semantic meaning of the questions posed to the LLM. Simply put, this involves converting text into numerical representations, aka vectors, using an embedding model.

![embedding](../img/embedding.png)

In [14]:
embedding_model = SentenceTransformerEmbeddingModel(model="all-MiniLM-L6-v2")

In [15]:
query = "What is the salary range of an airport engineer."
embedding1 = embedding_model.embed_query(query)

In [16]:
print(embedding1)

[0.061040811240673065, 0.006013249978423119, 0.012034785002470016, 0.09349054843187332, -0.03897635266184807, -0.041639544069767, 0.052157122641801834, 0.04001327604055405, -0.040530331432819366, -0.0388326458632946, -0.057182084769010544, -0.07547193765640259, -0.04045605659484863, 0.005403607618063688, -0.053804319351911545, 0.0300196073949337, -0.019651353359222412, -0.0824994146823883, 0.030851002782583237, -0.10073095560073853, 0.07586315274238586, 0.032726261764764786, 0.07222713530063629, -0.0612051896750927, 0.1382749229669571, -0.014632114209234715, 0.10441509634256363, 0.034444037824869156, 0.07069538533687592, 0.02793906442821026, 0.0020828156266361475, 0.039086028933525085, -0.02007865719497204, 0.019540050998330116, 0.043947625905275345, 0.01898784562945366, -0.025768116116523743, -0.002511907834559679, 0.05089978873729706, 0.005661378148943186, -0.03936994448304176, 0.018286826089024544, 0.010302698239684105, -0.017223015427589417, -0.061629876494407654, -0.02500708401203

Next, we also convert our documents into vectors so we can compare them to the user's question. By embedding both the question and the documents, we project the user's question into the vector space of the documents to identify the closest neighbors, such as the five most similar documents.

![vector-space](../img/vector-space.png)

In [17]:
document = """AIRPORT ENGINEER
Class Code:       7256
Open Date:  07-06-18
(Exam Open to All, including Current City Employees)

ANNUAL SALARY

$105,005 to $153,509 and $111,854 to $163,532."""

embedding2 = embedding_model.embed_query(document)

To compute the similarities between vectors, we can use mathematical formulas such as cosine similarity, euclidean distance, or inner product. The output will indicate how similar the vectors are.

Differences between value ranges:

- Euclidean distance: Measures the straight-line distance between two points. Smaller values indicate greater similarity.
- Cosine similarity: Measures the cosine of the angle between two vectors, ranging from -1 to 1. Values closer to 1 indicate greater similarity.
- Inner product: Measures the dot product of two vectors. Higher values typically indicate greater similarity.

In [18]:
cosine_similarity = dot(embedding1, embedding2) / (norm(embedding1) * norm(embedding2))
print(f"Cosine similarity: {cosine_similarity}")

Cosine similarity: 0.7345214541574135


## Naive Implementation

Naive RAG is suitable for specific objects like earthquakes or particular car models stored in a vector space. However, it struggles to ensure retrieved documents contain the correct context for effective query addressing. For instance, projecting an embedding into the vector space may not guarantee receiving only relevant documents.

In [19]:
try:
    vector_store = FAISSVectorStore.load_local("./data", "job_embeddings")
except:
    vector_store = FAISSVectorStore.from_documents(
        documents=documents,
        embedding_model=embedding_model,
        distance_metric=DistanceMetric.COSINE_SIMILARITY,
    )
    vector_store.save_local("./data", "job_embeddings")

In [20]:
tool_name = "Search"
tool_description = "Use this tool to search job postings."

class Search(BaseModel):
    user_text: str = Field(description="The user question/prompt/text.")
    fetch_k: int = Field(5, description="The number of documents to return.")

In [21]:
basic_retriever = BasicRetriever(
    vector_store=vector_store,
    score_threshold=0.0
)

basic_retriever_tool = Tool(
    func=basic_retriever.get_relevant_documents,
    name=tool_name,
    description=tool_description,
    args_schema=Search,
)

In [22]:
system_prompt = """You are an expert in job postings. Respond with the most accurate information about the job.

Use the search tool to answer the user's question."""

class Output(BaseModel):
    content: str = Field(description="The final answer.")

basic_retriever_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="{question}",
    task_prompt_variables=["question"],
    output_format=Output,
    tools=[basic_retriever_tool],
    iterations=5,
)

In [23]:
response = basic_retriever_agent.invoke({"question": "Give me the job description of an airport engineer."})

01/06/24 19:12:22 INFO Prompt:
Give me the job description of an airport engineer.
01/06/24 19:12:24 INFO Raw response:
{
  "thought": "I need to find the job description of an airport engineer.",
  "tool": "Search",
  "tool_input": {"user_text": "airport engineer job description", "fetch_k": 5}
}
01/06/24 19:12:24 INFO Thought:
I need to find the job description of an airport engineer.
01/06/24 19:12:24 INFO Tool:
Search
01/06/24 19:12:24 INFO Tool input:
{'user_text': 'airport engineer job description', 'fetch_k': 5}
01/06/24 19:12:24 INFO Tool response:
The examination will consist entirely of an evaluation of experience and personal qualifications by interview.  In the interview, the following competencies may be evaluated: Judgment and Decision Making, Initiative, Conscientiousness, Innovation, Emotional Maturity, Credibility, Leadership, and Job Knowledge, including knowledge of: Los Angeles World Airport organizational structure; local, state, and federal laws and regulations go

In [24]:
pprint(response.final_answer["content"])

('An Airport Engineer performs professional engineering work in the planning, '
 'design, construction, maintenance, and operation of landside facilities, '
 'structures, pavement and support systems at an airport. They also apply '
 'sound supervisory principles and techniques in building and maintaining an '
 'effective work force, and fulfill equal employment opportunity '
 'responsibilities. They need to have knowledge of engineering principles and '
 'practices related to civil, structural, electrical, mechanical, and/or '
 'communications; technical requirements and regulations related to the '
 'Federal Aviation Administration (FAA) and Transportation Security '
 'Administration (TSA) standards; layout, functions and components of airfield '
 'operational areas (AOA); layout, functions, and components of airport '
 'terminal facility operations.')


In [25]:
contextual_compression_retriever = ContextualCompressionRetriever(
    llm=llm,
    vector_store=vector_store,
    score_threshold=0.0,
)

contextual_compression_retriever_tool = Tool(
    func=contextual_compression_retriever.get_relevant_documents,
    name=tool_name,
    description=tool_description,
    args_schema=Search,
)

In [26]:
system_prompt = """You are an expert in job postings. Respond with the most accurate information about the job.

Use the search tool to answer the user's question."""

class Output(BaseModel):
    content: str = Field(description="The final answer.")

contextual_compression_retriever_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="{question}",
    task_prompt_variables=["question"],
    output_format=Output,
    tools=[contextual_compression_retriever_tool],
    iterations=5,
)

In [27]:
response = contextual_compression_retriever_agent.invoke({"question": "Give me the job description of an airport engineer."})

01/06/24 19:12:30 INFO Prompt:
Give me the job description of an airport engineer.
01/06/24 19:12:32 INFO Raw response:
{
  "thought": "I need to find the job description of an airport engineer.",
  "tool": "Search",
  "tool_input": {"user_text": "airport engineer job description", "fetch_k": 5}
}
01/06/24 19:12:32 INFO Thought:
I need to find the job description of an airport engineer.
01/06/24 19:12:32 INFO Tool:
Search
01/06/24 19:12:32 INFO Tool input:
{'user_text': 'airport engineer job description', 'fetch_k': 5}
01/06/24 19:12:39 INFO Tool response:
AIRPORT ENGINEER
Class Code:       7256
Open Date:  07-06-18
(Exam Open to All, including Current City Employees)

ANNUAL SALARY 
 
$105,005 to $153,509 and $111,854 to $163,532     

NOTES:

1. For information regarding reciprocity between the City of Los Angeles departments and LADWP, go to http://per.lacity.org/Reciprocity_CityDepts_and_DWP.pdf.
2. The current salary range is subject to change. You may confirm the starting salary 

In [28]:
pprint(response.final_answer["content"])

('An Airport Engineer performs professional engineering work in the planning, '
 'design, construction, maintenance, and operation of landside facilities, '
 'structures, pavement and support systems at an airport. They also apply '
 'sound supervisory principles and techniques in building and maintaining an '
 'effective work force, and fulfill equal employment opportunity '
 'responsibilities. The job requires knowledge of engineering principles and '
 'practices related to civil, structural, electrical, mechanical, and/or '
 'communications; technical requirements and regulations related to the '
 'Federal Aviation Administration (FAA) and Transportation Security '
 'Administration (TSA) standards; layout, functions and components of airfield '
 'operational areas (AOA); layout, functions, and components of airport '
 'terminal facility operations.')


## Graph Implementation

Knowledge graph or ontology-based RAG offers a practical approach but requires intricate implementation and management. Users ideally create a digital twin of their business, linking documents to specific objects. This ensures the LLM locates relevant objects and finds associated documents similar to the user query. For instance, a sophisticated RAG solution can exclusively search for repair manuals tailored to the owner's car model, grounding the LLM effectively. Additionally, with a knowledge graph or ontology, users control data access, enhancing security and query precision. Additionally, one can use an episodic memory to retain recent learnings, enabling AI agents to offer and store feedback. This feedback loop can improve task performance.

In [29]:
def create_dataset(documents: list[Document]) -> pd.DataFrame:
    data = []
    for document in documents:
        embedding = embedding_model.embed_query(document.page_content)
        data.append({
            "job_title": document.metadata.get("job_title") or "",
            "text": document.page_content,
            "embedding": embedding,
            "source": document.metadata.get("source") or "",
        })
    return pd.DataFrame(data)

In [30]:
df = create_dataset(documents)
data = {"jobs": df}

In [31]:
def get_available_job_titles() -> list[str]:
    return data["jobs"].job_title.unique().tolist()

get_jobs_tool = Tool(
    func=get_available_job_titles,
    name="Get Available Job Titles",
    description="Use this tool to get the job titles.",
    args_schema=None,
)

In [32]:
class Job(BaseModel):
    job_title: str = Field(description="The job title.")

job_agent = ReActAgent.create(
    llm=llm,
    system_prompt="",
    task_prompt="{question} \n\nRespond with the job title.",
    task_prompt_variables=["question"],
    output_format=Job,
    tools=[get_jobs_tool],
    iterations=10,
)

In [33]:
class Search(BaseModel):
    user_text: str = Field(description="The user question/prompt/text.")
    fetch_k: int = Field(5, description="The number of documents to return.")
    job_title: str = Field(description="The job title to filter for. Must be all caps.")

def search(user_text: str, fetch_k: int, job_title: str) -> str:

    def calculate_cosine_similarity(user_text_embedding, embedding):
        cosine_similarity = dot(user_text_embedding, embedding) / (norm(user_text_embedding) * norm(embedding))
        return cosine_similarity

    user_text_embedding = embedding_model.embed_query(user_text)
    df = data["jobs"]
    df = df.loc[df["job_title"] == job_title.upper()].copy()
    df["cosine_similarity"] = df.embedding.apply(lambda embedding: calculate_cosine_similarity(user_text_embedding, embedding))
    df = df.sort_values(by="cosine_similarity", ascending=False)
    df = df.iloc[:fetch_k]
    documents = "\n\n".join(df.text.tolist())
    return f"Context:\n\n{documents}"


search_tool = Tool(
    func=search,
    name=tool_name,
    description=tool_description,
    args_schema=Search,
)

In [34]:
system_prompt = """You are an expert in job postings. Respond with the most accurate information about the job.

Use the Search tool to find the job description."""

class Output(BaseModel):
    content: str = Field(description="The final answer.")

agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="{job_title}",
    task_prompt_variables=["job_title"],
    output_format=Output,
    tools=[search_tool],
    iterations=10,
)

In [35]:
chain = AgentChain(
    chain=[job_agent, agent],
    chain_variables=["question"],
)

In [36]:
response = chain.invoke({"question": "Give me the job description of an airport engineer."})

01/06/24 19:13:22 INFO Prompt:
Give me the job description of an airport engineer. 

Respond with the job title.
01/06/24 19:13:24 INFO Raw response:
{
  "thought": "I need to find the job title for an airport engineer",
  "tool": "Get Available Job Titles",
  "tool_input": {}
}
01/06/24 19:13:24 INFO Thought:
I need to find the job title for an airport engineer
01/06/24 19:13:24 INFO Tool:
Get Available Job Titles
01/06/24 19:13:24 INFO Tool input:
{}
01/06/24 19:13:24 INFO Tool response:
['SENIOR HOUSING INSPECTOR', 'LEGISLATIVE ASSISTANT', 'DISTRICT SUPERVISOR ANIMAL SERVICES', 'LEGISLATIVE REPRESENTATIVE', 'Gallery Attendant', 'TRUCK AND EQUIPMENT DISPATCHER', 'COMMUNICATIONS CABLE SUPERVISOR', 'SHIP CARPENTER', 'PRINCIPAL SECURITY OFFICER', 'PRINCIPAL CIVIL ENGINEERING DRAFTING TECHNICIAN', 'AIRPORT ENGINEER', 'BENEFITS SPECIALIST', 'SENIOR ELECTRIC SERVICE REPRESENTATIVE', 'SENIOR BUILDING INSPECTOR', 'TREE SURGEON ASSISTANT', 'Claims Agent', 'Senior City Planner', 'Treasury Acco

In [37]:
pprint(response.final_answer["content"])

('An Airport Engineer performs professional engineering work in the planning, '
 'design, construction, maintenance, and operation of landside facilities, '
 'structures, pavement and support systems at an airport. They also apply '
 'sound supervisory principles and techniques in building and maintaining an '
 'effective work force, and fulfill equal employment opportunity '
 'responsibilities. The annual salary ranges from $105,005 to $153,509 and '
 '$111,854 to $163,532. The requirements for this position include two to five '
 'years of full-time paid professional experience in the areas of civil, '
 'structural, mechanical, electrical, or communication engineering in the '
 'design, construction, management or engineering of airport/aviation projects '
 'or programs, and a valid license as a Professional Engineer with the '
 'California State Board of Registration for Professional Engineers.')
