# Notebook Setup
<a target="_blank" href="https://colab.research.google.com/github/PacktPublishing/Generative-AI-Integration-Patterns-1E/blob/main/Chapter08/Integration_pattern_Real_Time_retrieval_augmented_generation.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [None]:
#Install dependencies

!pip install --upgrade google-cloud-aiplatform
!pip install --upgrade langchain_community langchain_google_vertexai langchain_chroma unstructured[pdf]

In [None]:
#Authenticate
from google.colab import auth as google_auth
google_auth.authenticate_user()

In [None]:
import base64
import json

#VertexAI
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models
from google.cloud import aiplatform

# Langchain
from langchain_community.document_loaders import TextLoader, UnstructuredPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from unstructured.cleaners.core import clean_extra_whitespace
from langchain_google_vertexai import VertexAIEmbeddings


#Markdown
from IPython.display import display, Markdown, Latex


In [None]:
PROJECT = "testproject-410220"#@param {type:"string"}
LOCATION = "us-central1"#@param {type:"string"}
MODEL = "gemini-1.5-flash-001"#@param {type:"string"}
EMBEDDINGS_MODEL = "text-embedding-004"#@param {type:"string"}
MAX_RESULTS = 4#@param {type:"number"}

# Vector database initialization and ingestion


In [None]:
#@title Dataset Download

!wget https://d1io3yog0oux5.cloudfront.net/_c38ec26158c6d5493f3fce02d606a6a1/cocacolacompany/db/764/8109/file/CORRECTED+TRANSCRIPT_+The+Coca-Cola+Co.%28KO-US%29%2C+Q1+2024+Earnings+Call%2C+30-April-2024+8_30+AM+ET.pdf -O coca_cola_earnings_call_2023.pdf

In [None]:
# load the document and split it into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=200)
loader = UnstructuredPDFLoader("coca_cola_earnings_call_2023.pdf",post_processors=[clean_extra_whitespace])
pages = loader.load_and_split(text_splitter=text_splitter)

In [None]:
#Check what is in the chunks
print(pages[1])

In [None]:
#Init VertexAI Platform
aiplatform.init(project=PROJECT, location=LOCATION)
embeddings_function = VertexAIEmbeddings(model=EMBEDDINGS_MODEL)

In [None]:
#Create a local instance of ChromaDB
from langchain_chroma import Chroma

# Generate embeddings and load them into ChromDB
db = Chroma.from_documents(pages, embeddings_function)

In [None]:
#Test query

# Test query
query = "Who is the call for?"
docs = db.similarity_search(query,k=MAX_RESULTS)

# Print results
print(docs[0].page_content)

In [None]:
#@title RAG Logic

#In this section we define the prompt, as the task is to perform intent classification we will identify the intent by exposing the possible values to the LLM
prompt_template = """
You are a helpful assistant for an online financial services company that allows users to check their balances, invest in certificates of deposit (CDs), and perform other financial transactions.

Your task is to answer questions from your customers, in order to do so follow these rules:

1. Carefully analyze the question you received.
2. Carefully analyze the context provided.
3. Answer the question using ONLY the information provided in the context, NEVER make up information
4. Always think step by step.

<context>
{context}
</context>
User question: {query}
Answer:
"""

generation_config = {
    "max_output_tokens": 8192,
    "temperature": 0,
    "top_p": 0.95,
}

safety_settings = {
    generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
}

def get_context(query, db, number_of_results):
  context_string = ""
  docs = db.similarity_search(query,k=number_of_results)
  for doc in docs:
    new_context = f"""\n---This information is contained in a document called {doc.metadata["source"]} \n\n {doc.page_content}\n\n---"""
    context_string = context_string+new_context
  return(context_string)


def generate(prompt):
  model = GenerativeModel(MODEL)
  responses = model.generate_content(
      [prompt],
      generation_config=generation_config,
      safety_settings=safety_settings,
      stream=False,
  )
  return(responses)



# Entry Point

In [None]:
#In this case we will simulate the input from a chat interface

question = "What is this call about?"

# Prompt Preprocessing

In [None]:
#In this step we will query the vector database with the question received, and then populate the promp template with both the question and the context
context = get_context(question, db, MAX_RESULTS)
prompt = prompt_template.format(query=question, context=context)

In [None]:
print(prompt)

# Inference

In [None]:
#This is the section where we submit the full prompt and context to the LLM
result = generate(prompt)

# Result Postprocessing

In [None]:
#In this section you can format the answer for example with markdown
formatted_result = f"###Question:\n{question}\n\n###Answer:\n{result.text}\n\n<details><summary>Context</summary>{context}</details>"

# Result Presentation

In [None]:
display(Markdown(formatted_result))

# Demo

In [None]:
#In this case we will use a Gradio interface to interact with the system

#Install Gradio

!pip install --upgrade gradio

In [None]:
import gradio as gr

def answer_question(query, db, number_of_results):
  context = get_context(query, db, number_of_results)
  answer = generate(prompt_template.format(query=query, context=context))
  return(answer.text)

def chat(message, history):
    response = answer_question(message,db, MAX_RESULTS)
    history.append((message, response))
    return "", history


with gr.Blocks() as demo:
  gr.Markdown("Fintech Assistant")
  chatbot = gr.Chatbot(show_label=False)
  message = gr.Textbox(placeholder="Enter your question")
  message.submit(chat, [message, chatbot],[message, chatbot]  )

demo.launch(debug=True)