<a href="https://colab.research.google.com/github/Satyadeep-Dey/AI-experiments/blob/main/10_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#RAG with fallback to LLM
### First use RAG to answer questions from a knowledge base stored in Chroma Vector DB
For this we use ConversationalRetrievalChain
###Then we'll see how to get data from LLM directly in case RAG cannot answer the question
Direct retriever is used in this scenario

In [None]:
!pip install -q langchain langchain-openai langchain-chroma langchain-core openai tiktoken chromadb transformers sentence-transformers langchain-community


In [None]:
# imports
import os
import time
from google.colab import drive
from google.colab import userdata
import requests
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from huggingface_hub import login
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.schema import Document
from langchain.schema import HumanMessage, AIMessage
# needed for falling back to LLM when KB does not have required info
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain

## Utility Function : Read from a file

In [None]:
def read_text_from_file(folder_path, file_name):

  # Always mount Drive explicitly when using Google Drive
  drive.mount('/content/drive', force_remount=True)
  print("Drive mounted.")

  # Wait until MyDrive is available
  mydrive_path = '/content/drive/MyDrive'
  while not os.path.exists(mydrive_path):
      print("Waiting for Drive to be ready...")
      time.sleep(1)

  # Path to the file
  file_path = os.path.join(mydrive_path, folder_path, file_name)

  # Check if the file exists
  if os.path.exists(file_path):
      # Read the content of the file
      with open(file_path, 'r') as file:
          contents = file.read()
      return contents
  else:
      return "File not found!"


In [None]:
# Constants

GPT_4o_mini = "gpt-4o-mini"
GPT_4o ="gpt-4o"
db_name = "vector_db"


In [None]:
# Sign in to HuggingFace Hub

hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [None]:
# Sign in to OpenAI using Secrets in Colab

openai_api_key = userdata.get('OPENAI_API_KEY')
openai = OpenAI(api_key=openai_api_key)


In [None]:
# Let's read the text first

original_content = read_text_from_file(
    folder_path="Files/Knowledge-Base",
    file_name= "Anonymized by OpenAI_TOTC.txt" #"Anonymized by OpenAI_TOTC_V4.txt"
)

print(f"The number of characters are : {len(original_content)}")
number_of_words = len(original_content.split())
# Divides a string into a list of substrings based on a specified separator (default is whitespace) and then counts length of list
print(f"Number of words is : {number_of_words}")
# print()
# print(original_content)



In [None]:
# Wrap text as a Document
doc = Document(page_content=original_content)

# Split into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents([doc])
print(f"Total number of chunks: {len(chunks)}")


In [None]:
# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Note : we need to use 'openai_api_key=openai_api_key' because we're using LangChain and not Open AI directly !

# Create vectorstore
vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
# Note : The vector DB will be stored in the local folder of this Notebook and will be lost when we disconnect from runtime
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

In [None]:
# create a new Chat with OpenAI
llm = ChatOpenAI(openai_api_key=openai_api_key,temperature=0.7, model_name=GPT_4o_mini)

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG
retriever = vectorstore.as_retriever()

# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [None]:
# Let's try a simple question

query = "Who is the author of A Chronicle of Two Cities"
result = conversation_chain.invoke({"question": query})
print(result["answer"])

In [None]:
# a better way to represent this
chat_history_data = result

# Iterate over the messages and print who said what
for message in chat_history_data['chat_history']:
    if isinstance(message, HumanMessage):
        print(f"Human: {message.content}")
    elif isinstance(message, AIMessage):
        print(f"AI: {message.content}")

In [None]:
memory.clear()
# since we want to ask another un-related question

query = "Can you describe Elise Manet in a few sentences"
chat_history_data = conversation_chain.invoke({"question":query})
print(chat_history_data["answer"]) # just answer .. not entire chat

query = "Who is Elise Manet married to ?" # chat retains context and so knows that "his" means Alex .
chat_history_data = conversation_chain.invoke({"question":query})
print(chat_history_data["answer"])

#Uncomment print statement to see complete chat history .You'll see previous question and answer are part of chat_history
#print(chat_history_data)

In [None]:
query = "Who is Philippe Duval married to ?"
chat_history_data = conversation_chain.invoke({"question":query})
print(chat_history_data["answer"])

query = "What is his original name ?" # chat retains context and so knows what we mean by "his"
chat_history_data = conversation_chain.invoke({"question":query})
print(chat_history_data["answer"])


In [None]:
query = "Who are the owners of the wine shop in this story ?"
chat_history_data = conversation_chain.invoke({"question":query})
print(chat_history_data["answer"])

In [None]:
query = "Who is on trial for treason against England ?"
chat_history_data = conversation_chain.invoke({"question":query})
print(chat_history_data["answer"])

In [None]:
query = "Why is he on trial ?" # chat retains context and so knows what we mean by "he"
chat_history_data = conversation_chain.invoke({"question":query})
print(chat_history_data["answer"])

query = "Who are the witnesses in this trial ?"
chat_history_data = conversation_chain.invoke({"question":query})
print(chat_history_data["answer"])

In [None]:
query = "What is the capital of India ?"
chat_history_data = conversation_chain.invoke({"question":query})
print(chat_history_data["answer"])
# cannot answer because it's only looking at KB in vector DB

#Create a decision chain to route between RAG and direct LLM

*    Using Direct retriever ... NO conversation history
*    This code is courtesy Claude 3.7 Sonnet. Works w/o any change !!
*    First tried with Open AI ChatGPT but didn't work !

In [None]:
# Assuming you have llm, retriever, and memory already set up

# Create a decision chain to route between RAG and direct LLM
decision_template = """Determine if the following question requires domain-specific knowledge or is general knowledge.
If the question is about a book titled 'A Chronicle of Two Cities', it's characters , events , places or specialized information
likely in your knowledge base, respond with "DOMAIN".
If the question is about general facts like capitals, history, science, or common knowledge, respond with "GENERAL".

Question: {question}

Decision (DOMAIN/GENERAL):"""

decision_prompt = PromptTemplate(template=decision_template, input_variables=["question"])
decision_chain = LLMChain(llm=llm, prompt=decision_prompt)

# Create a simple function to handle the routing logic
def smart_qa(question):
    # First determine if we should use RAG or direct LLM
    decision_result = decision_chain.run(question=question)
    print(f"Decision: {decision_result}")

    if "DOMAIN" in decision_result.upper():
        # Use RAG for domain-specific questions
        docs = retriever.get_relevant_documents(question)
        #print(docs)
        if docs:
            # Create a QA chain for RAG
            qa_chain = load_qa_chain(llm=llm, chain_type="stuff")
            return qa_chain.run(input_documents=docs, question=question)
        else:
            # Fall back to general if no docs found
            return llm.generate([[HumanMessage(content=f"Answer this question: {question}")]])\
                  .generations[0][0].text
    else:
        # Use direct LLM for general knowledge questions
        return llm.generate([[HumanMessage(content=f"Answer this general knowledge question: {question}")]])\
              .generations[0][0].text


In [None]:
# Example usage
print(smart_qa("What is the capital of Australia?")) # GENERAL
print()
print(smart_qa("Who is the author of the book A Chronicle of Two Cities")) #DOMAIN
#above question creates a problem for the LLM . It classifies it as GENERAL
# probably because name of the book is similar to the original.
print()
print(smart_qa("Who are the owners of the wine shop in this story ?")) #DOMAIN
print()
print(smart_qa("Can you describe Elise Manet in a few sentences")) #DOMAIN
print()
print(smart_qa("Who is the Jackal and who is the Lion ?"))#DOMAIN
print()
print(smart_qa("Who is Philippe Duval married to ?"))#DOMAIN
print()
print(smart_qa("What is his original name ?"))#DOMAIN
print()
print(smart_qa("Who are the witnesses in this trial ?")) #DOMAIN
print()
print(smart_qa("What is the capital of UK ?")) #GENERAL
print()

