# 2. IDAS Conversational RAG

Having a vector dataset generated with step 1. This code shows how to perform questions and get answers augmented with context from the dataset. This is called "Retrieval Augmented Generation" or RAG.

Authors:
- Luis Bernardo Hernandez Salinas
- Juan R. Terven

In [None]:
import os
from langchain_anthropic import ChatAnthropic
from langchain_mistralai.chat_models import ChatMistralAI
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.vectorstores import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

import logging
logging.getLogger().setLevel(logging.ERROR) # hide warning log

## Set API, embedding and vector database path

In [None]:
# List of models tested:
# gpt-3.5-turbo
# gpt-4-turbo
# gpt-4o
# gpt-4o-mini
# claude-3-haiku-20240307
# claude-3-sonnet-20240229
# claude-3-opus-20240229
# claude-3-5-sonnet-20240620
# open-mistral-7b
# open-mixtral-8x7b
# open-mixtral-8x22b
# mistral-large-2407
# open-mistral-nemo-2407
# llama-7b-chat
# llama-13b-chat
# llama-70b-chat
# llama3-8b
# llama3-70b
# llama3.1-8b
# llama3.1-70b
# llama3.1-405b
# Qwen2-72B
# gemma-7b
# gemma-2b

# Model to use
llm_name = "gpt-4o"

embedding_dimensions = 3072 #1536  # 3072

# API key 
if "gpt" in llm_name:
    client = os.environ['OPENAI_API_KEY']
elif "claude" in llm_name:
    client = os.environ['ANTHROPIC_API_KEY']
elif "mistral" in llm_name or "mixtral" in llm_name:
    client = os.environ['MISTRAL_API_KEY']
elif "llama" in llm_name or "gemma" in llm_name:
    client = os.environ['LLAMA_API_KEY']
else:
    print("INVALID MODEL!")
    
print(f"Using model {llm_name}")

embedding_model = OpenAIEmbeddings(model="text-embedding-3-large", dimensions=embedding_dimensions)

# Vector dataset
vectordb_directory = f'vector_database_chspark_{embedding_dimensions}'
print(f"Using vector database {vectordb_directory}")

## Load Vector database

In [None]:
# Create chroma db from existing vectordb_directory
vectordb = Chroma(
    embedding_function=embedding_model,
    persist_directory=vectordb_directory
)

retriever = vectordb.as_retriever()

print(f"Load {vectordb._collection.count()} collections from vector database")


## Set LLM

In [None]:
if "gpt" in llm_name:
    llm = ChatOpenAI(model_name=llm_name, api_key=client, temperature=0) 
elif "claude" in llm_name:    
    llm = ChatAnthropic(model_name=llm_name, api_key=client, temperature=0)
elif "mistral" in llm_name or "mixtral" in llm_name:
    llm = ChatMistralAI(model=llm_name, api_key=client, temperature=0)
elif "llama" in llm_name or "gemma" in llm_name:
    llm = ChatOpenAI(model_name=llm_name, api_key=client, temperature=0,
                     base_url="https://api.llama-api.com") 

if hasattr(llm, "model_name"):
    print(f"Using Model: {llm.model_name}")

## Model instruction for Retrieval Augmented Generation

In [None]:
# Contextualize question
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)


# Answer question
system_prompt = (
    "You are an intelligent driver assistant called IDAS, your task is to answer the questions that are asked of you" 
    "If the question is about the vehicle, use the provided context obtained from the car manual" 
    "If you don’t know the answer even with the context provided say 'I don't know the answer'"
    "Don’t try to make up an answer."
    "Respond in a concrete way, provide the information extracted and summarized from the context"
    "Do not say that the information appear in the manual for the user to search unless that is the user desire"
    "Keep the answer as concise as possible."
    "\n\n"
    "{context}"
)

### Conversational RAG object

In [None]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


# Statefully manage chat history
store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

## Let's try RAG with a query

In [None]:
question = "What is the optimal air pressure for the tires?, tell me in units"

ai_msg_1 = conversational_rag_chain.invoke(
    {"input": question},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)

print(ai_msg_1["answer"])

In [None]:
second_question = "is it the same for front and rear tires?"

ai_msg_2 = conversational_rag_chain.invoke(
    {"input": second_question},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)

print(ai_msg_2["answer"])