# 2. RAG Question Answering for Car Intelligent Assistant

Having a vector dataset generated with step 1. This code shows how to perform questions and get answers augmented with context from the dataset. This is called "Retrieval Augmented Generation" or RAG.

Authors:
- Luis Bernardo Hernandez Salinas
- Juan R. Terven

In [None]:
import os
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_mistralai.embeddings import MistralAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from getpass import getpass
from langchain_core.prompts import ChatPromptTemplate

## Set API, embedding and vector database path

In [None]:
# List of models tested:
# gpt-3.5-turbo
# gpt-4-turbo
# gpt-4o
# gpt-4o-mini
# claude-3-haiku-20240307
# claude-3-sonnet-20240229
# claude-3-opus-20240229
# claude-3-5-sonnet-20240620
# open-mistral-7b
# open-mixtral-8x7b
# open-mixtral-8x22b
# mistral-large-2407
# open-mistral-nemo-2407
# llama-7b-chat
# llama-13b-chat
# llama-70b-chat
# llama3-8b
# llama3-70b
# llama3.1-8b
# llama3.1-70b
# llama3.1-405b
# Qwen2-72B
# gemma-7b
# gemma-2b

# Model to use
llm_name = "gpt-4o"

embedding_dimensions = 3072 #1536  # 3072

# API key 
if "gpt" in llm_name:
    client = os.environ['OPENAI_API_KEY']
elif "claude" in llm_name:
    client = os.environ['ANTHROPIC_API_KEY']
elif "mistral" in llm_name or "mixtral" in llm_name:
    client = os.environ['MISTRAL_API_KEY']
elif "llama" in llm_name or "gemma" in llm_name:
    client = os.environ['LLAMA_API_KEY']
else:
    print("INVALID MODEL!")
    
print(f"Using model {llm_name}")

embedding_model = OpenAIEmbeddings(model="text-embedding-3-large", dimensions=embedding_dimensions)

# Vector dataset
vehicle_name = "kia_sorento"
vectordb_directory = f'vector_database_{vehicle_name}_{embedding_dimensions}'
print(f"Using vector database {vectordb_directory}")

## Load Vector database

In [None]:
# Create chroma db from existing vectordb_directory
vectordb = Chroma(
    embedding_function=embedding_model,
    persist_directory=vectordb_directory
)

print(f"Load {vectordb._collection.count()} collections from vector database")


## Model instruction for Retrieval Augmented Generation

In [None]:
template = """\
You are an intelligent driver assistant called IDAS, your task is to answer the questions that are asked of you. 
If the question is about the vehicle, use the provided context obtained from the car manual. 
If you don’t know the answer even with the context provided say "I am sorry, I did not find the answer in the car manual"
Don’t try to make up an answer.
Respond in a concrete way, provide the information extracted and summarized from the context,
do not say that the information appear in the manual for the user to search unless that is the user desire.
Keep the answer as concise as possible. 
Context: {context}
Question: {question}
Helpful Answer:
"""

### Set LLM and RAG object

In [None]:
# create prompt template object
qa_chain_prompt = PromptTemplate.from_template(template)

if "gpt" in llm_name:
    llm = ChatOpenAI(model_name=llm_name, api_key=client, temperature=0) 
elif "claude" in llm_name:    
    llm = ChatAnthropic(model_name=llm_name, api_key=client, temperature=0)
elif "mistral" in llm_name or "mixtral" in llm_name:
    llm = ChatMistralAI(model=llm_name, api_key=client, temperature=0)
elif "llama" in llm_name or "gemma" in llm_name:
    llm = ChatOpenAI(model_name=llm_name, api_key=client, temperature=0,
                     base_url="https://api.llama-api.com") 

#print(f"Using Model: {llm.model_name}")

# QA RAG object
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": qa_chain_prompt}
)

## Let's try RAG with a query

In [None]:
query = "How many airbags does this car have?"

# Run RAG chain: 
# 1. Take the query and add it to the prompt
# 2. Get query embedding
# 3. Retrieve the most relevant documents based on embedding similarity
# 4. Augment the prompt with the retrieved documents
# 5. Send prompt to LLM
# 6. Get model response
model_response = qa_chain.invoke({"query": query})

print(model_response["result"])
