In [9]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [32]:
import os 
from pathlib import Path 
import pandas as pd 

from langchain_community.vectorstores import FAISS 
from langchain.prompts import PromptTemplate 
from langchain.document_loaders import PyPDFLoader 
from langchain.chains import RetrievalQA, load_chain 
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter


In [24]:
from giskard import Dataset, Model, scan

In [19]:
llm = ChatGoogleGenerativeAI(
    model = "gemini-2.5-flash",
    temperature = 0.7,
    api_key = "AIzaSyCXOqoHLVyHwfV9yoLoj66eyj62s8WhsAs"
)

embeddings = GoogleGenerativeAIEmbeddings(
    model = "models/gemini-embedding-001",
    google_api_key= "AIzaSyCXOqoHLVyHwfV9yoLoj66eyj62s8WhsAs" #type:ignore 
)

In [25]:
# Display options.
pd.set_option("display.max_colwidth", None)

In [26]:
IPCC_REPORT_URL = "https://www.ipcc.ch/report/ar6/syr/downloads/report/IPCC_AR6_SYR_LongerReport.pdf"
TEXT_COLUMN_NAME = "query"

PROMPT_TEMPLATE = """You are the Climate Assistant, a helpful AI assistant made by Giskard.
Your task is to answer common questions on climate change.
You will be given a question and relevant excerpts from the IPCC Climate Change Synthesis Report (2023).
Please provide short and clear answers based on the provided context. Be polite and helpful.

Context:
{context}

Question:
{question}

Your answer:
"""

In [27]:
## define the function to get context storage
def get_context_storage():
    text_spliter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100, add_start_index=True)
    docs = PyPDFLoader(IPCC_REPORT_URL).load_and_split(text_spliter) ## this will return list of docs
    db = FAISS.from_documents(docs, embeddings) ## this will return vectorstores of documents parsed with embedding function to be used
    return db

In [None]:
prompt = PromptTemplate(template=PROMPT_TEMPLATE,input_variables=['question', 'context'])
retriever = VectorStoreRetriever(vectorstore=get_context_storage())
climet_qa_chain = RetrievalQA.from_llm(llm=llm, retriever=retriever, prompt=prompt)

In [35]:
climet_qa_chain.invoke("is sea level rise adorable? When will it stop?")

{'query': 'is sea level rise adorable? When will it stop?',
 'result': 'The provided context does not describe sea level rise as adorable; instead, it details its significant impacts and risks.\n\nSea level rise will not stop in the near future; it is projected to continue for millennia. The speed and total amount of sea level rise depend on future emissions, with higher emissions leading to greater and faster rates. Risks for coastal ecosystems, people, and infrastructure are expected to continue increasing beyond 2100.'}