# RAG application built on IAG PDS

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os

# Define the path to your folder in Google Drive
persistent_folder = '/content/drive/My Drive/Colab Notebooks/AI_Chatbot_RAG/'

# Create the folder if it doesn't exist
os.makedirs(persistent_folder, exist_ok=True)

# Change directory to my persistent folder
os.chdir('/content/drive/My Drive/Colab Notebooks/AI_Chatbot_RAG/')

In [None]:
!pip install -r requirements.txt


Collecting langchain_community (from -r requirements.txt (line 2))
  Downloading langchain_community-0.3.13-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-google-genai (from -r requirements.txt (line 3))
  Downloading langchain_google_genai-2.0.7-py3-none-any.whl.metadata (3.6 kB)
Collecting python-dotenv (from -r requirements.txt (line 4))
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting streamlit (from -r requirements.txt (line 5))
  Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting langchain_experimental (from -r requirements.txt (line 6))
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Collecting langchain_chroma (from -r requirements.txt (line 8))
  Downloading langchain_chroma-0.1.4-py3-none-any.whl.metadata (1.6 kB)
Collecting langchainhub (from -r requirements.txt (line 9))
  Downloading langchainhub-0.1.21-py3-none-any.whl.metadata (659 bytes)
Collecting pypdf (from -r requirement

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("iag_pds.pdf")
data = loader.load()  # entire PDF is loaded as a single Document
#data

In [None]:
len(data)

74

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)


print("Total number of documents: ",len(docs))

Total number of documents:  270


In [None]:
docs[7]

Document(metadata={'source': 'iag_pds.pdf', 'page': 3}, page_content='will not receive any compensation, and you have no right to take action against IAG if \nany of your Notes are Written-Off \nReceipt of Ordinary Shares on \nConversion\nYou will receive approximately NZ$1.01 worth of Ordinary Shares for each of your \nNotes that is Converted, unless a cap referred to as the “Maximum Conversion \nNumber” applies – see Section 6.7 of this PDS (Conversion formulae). It is likely the \nMaximum Conversion Number will apply following a Non-Viability Trigger Event, \nin which case you may receive significantly less than NZ$1.01 worth of Ordinary \nShares for each of your Notes that is Converted')

In [None]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from dotenv import load_dotenv
load_dotenv()

#Get an API key:
# Head to https://ai.google.dev/gemini-api/docs/api-key to generate a Google AI API key. Paste in .env file

# Embedding models: https://python.langchain.com/v0.1/docs/integrations/text_embedding/

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector = embeddings.embed_query("hello, world!")
vector[:5]
#vector

[0.05168594419956207,
 -0.030764883384108543,
 -0.03062233328819275,
 -0.02802734263241291,
 0.01813093200325966]

In [None]:
vectorstore = Chroma.from_documents(documents=docs, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

In [None]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})

retrieved_docs = retriever.invoke("What is a Non-Viability Trigger Event?")


In [None]:
len(retrieved_docs)

10

In [None]:
print(retrieved_docs[5].page_content)

• IAG would be required on the next payment due on the Notes to pay an additional amount in 
respect of a withholding or deduction on account of taxes on a payment on the Notes;
• IAG or the consolidated tax group of which it is a member would be exposed to more than a de 
minimis amount of other taxes, assessments or other governmental charges in connection with 
the Notes; or
• IAG determines that any interest payable on the Notes is not or may not be allowed as a 
deduction for the purposes of Australian income tax,
provided that on the Issue Date, IAG did not expect that matters giving rise to the Tax Event  
would occur
Tier 1 Capital Tier 1 capital (as defined by APRA from time to time)
Tier 2 Capital Tier 2 capital (as defined by APRA from time to time)
Trigger Event Date the date on which APRA notifies IAG that a Non-Viability Trigger Event has occurred
Trust Deed the trust deed dated 4 May 2016 between IAG and the Supervisor, as amended from time to time.


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.3, max_tokens=500)

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [None]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
response = rag_chain.invoke({"input": "What is a Non-Viability Trigger Event?"})
print(response["answer"])

A Non-Viability Trigger Event occurs if APRA determines that IAG would become non-viable without converting or writing off capital instruments (like Notes) or receiving a public sector capital injection.  This event is at APRA's discretion and may include, but isn't limited to, severe impairment of IAG's financial position and solvency.  Several factors, including macroeconomic conditions and operational issues, can impact IAG's viability and trigger such an event.



In [None]:
# Install Mesop and any other required packages
!pip install mesop

Collecting mesop
  Downloading mesop-0.13.0-py3-none-any.whl.metadata (1.0 kB)
Collecting deepdiff==6.* (from mesop)
  Downloading deepdiff-6.7.1-py3-none-any.whl.metadata (6.1 kB)
Collecting ordered-set<4.2.0,>=4.0.2 (from deepdiff==6.*->mesop)
  Downloading ordered_set-4.1.0-py3-none-any.whl.metadata (5.3 kB)
Downloading mesop-0.13.0-py3-none-any.whl (7.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading deepdiff-6.7.1-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.6/76.6 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ordered_set-4.1.0-py3-none-any.whl (7.6 kB)
Installing collected packages: ordered-set, deepdiff, mesop
Successfully installed deepdiff-6.7.1 mesop-0.13.0 ordered-set-4.1.0


In [None]:
import mesop as me
import mesop.labs as mel

me.colab_run()


[32mRunning server on: http://localhost:32123[0m


In [None]:
# Define the chat function and connect to RAG model
@me.page(path="/chat")
def chat():
    mel.chat(transform)

# Modify transform to use RAG to generate responses
def transform(prompt: str, history: list[mel.ChatMessage]) -> str:
    # Assuming rag_chain is your RAG pipeline
    response = rag_chain.invoke({"input": prompt})
    return response["answer"]  # Return the RAG model's answer

# Display the chat window in Colab
me.colab_show(path="/chat")


<IPython.core.display.Javascript object>

In [None]:
!pip install pyngrok


 * Debug mode: off


 * Running on all addresses (::)
 * Running on http://[::1]:32123
 * Running on http://[::1]:32123
INFO:werkzeug:[33mPress CTRL+C to quit[0m


Collecting pyngrok
  Downloading pyngrok-7.2.2-py3-none-any.whl.metadata (8.4 kB)
Downloading pyngrok-7.2.2-py3-none-any.whl (22 kB)


INFO:werkzeug:::ffff:127.0.0.1 - - [26/Dec/2024 07:20:38] "GET /chat?authuser=0 HTTP/1.1" 200 -
INFO:werkzeug:::ffff:127.0.0.1 - - [26/Dec/2024 07:20:38] "GET /styles.css?authuser=0 HTTP/1.1" 200 -
INFO:werkzeug:::ffff:127.0.0.1 - - [26/Dec/2024 07:20:38] "GET /zone.js/bundles/zone.umd.js?authuser=0 HTTP/1.1" 200 -
INFO:werkzeug:::ffff:127.0.0.1 - - [26/Dec/2024 07:20:39] "GET /editor_bundle/bundle.js?authuser=0 HTTP/1.1" 200 -
INFO:werkzeug:::ffff:127.0.0.1 - - [26/Dec/2024 07:20:41] "POST /__ui__?authuser=0 HTTP/1.1" 200 -


Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.2


In [None]:
import mesop as me
import mesop.labs as mel

# Initialize Mesop as usual
me.colab_run()

# Define the chat UI page and connect it to the RAG model
@me.page(path="/chat")
def chat():
    mel.chat(transform)

# Modify transform to use the RAG model to generate responses
def transform(prompt: str, history: list[mel.ChatMessage]) -> str:
    # Assuming rag_chain is your RAG pipeline
    response = rag_chain.invoke({"input": prompt})
    return response["answer"]  # Return the RAG model's answer

# Start the Mesop server on port 8080
me.run(port=8080)



[32mRunning server on: http://localhost:32123[0m
 * Serving Flask app 'mesop.server.server'
 * Debug mode: off


Address already in use
Port 32123 is in use by another program. Either identify and stop that program, or start the server with a different port.


AttributeError: module 'mesop' has no attribute 'run'