In [1]:
# import os, getpass
import chromadb
from ibm_watsonx_ai import Credentials, APIClient
from langchain_chroma import Chroma
from langchain_core.documents import Document
from uuid import uuid4
from langchain_text_splitters import MarkdownTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_ibm import WatsonxEmbeddings
from ibm_watsonx_ai.foundation_models.utils.enums import ModelTypes
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from ibm_watsonx_ai.foundation_models.utils.enums import DecodingMethods
from langchain_ibm import ChatWatsonx, WatsonxLLM
# from langchain.chains import RetrievalQAjin
from ibm_watsonx_ai.foundation_models.schema import TextChatParameters
from langchain_experimental.text_splitter import SemanticChunker
from langchain_huggingface import HuggingFaceEmbeddings
from transformers import AutoTokenizer
from docling.document_converter import DocumentConverter
from utils import *
from langchain_core.messages import (
    HumanMessage,
    SystemMessage,
    AIMessage
)
PAGE_BREAK = "<!-- page break -->"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
api_key="VPryyL4t508YQ_pJQNCU0m23Yctctahrqo2pjT5BMQkV"
url="https://us-south.ml.cloud.ibm.com"
project_id = "6552c3bb-a35a-4bf3-8430-c42828d59a06"

## Load documents

In [3]:
# file_path = "data/climate_edu/389801eng.pdf"
# converter = DocumentConverter()
# result = converter.convert(file_path)
# doc = result.document
# doc_name = doc.name

In [4]:
# doc_md = doc.export_to_markdown(page_break_placeholder=PAGE_BREAK, image_placeholder="",)
# with open("data/climate_edu/389801eng.md", "w") as f:
#     f.write(doc_md)

# pages = doc_md.split(PAGE_BREAK)
# pages = [markdown_cleanup(page) for page in pages]

## Chunk

In [5]:
# tokenizer = AutoTokenizer.from_pretrained(
#     "ibm-granite/granite-embedding-278m-multilingual"
# )
# max_length = tokenizer.model_max_length
# chunk_overlap_length = int(max_length / 10)
# chunk_size_length = max_length - chunk_overlap_length
# markdown_splitter = MarkdownTextSplitter.from_huggingface_tokenizer(
#     tokenizer=tokenizer,
#     chunk_size=chunk_size_length,
#     chunk_overlap=chunk_overlap_length
# )

In [6]:
# chunked_docs = []
# for i, page in enumerate(pages):
#     chunks = markdown_splitter.split_text(page)
#     for j, chunk in enumerate(chunks):
#         document = Document(
#             page_content=chunk,
#             embeddings=tokenizer.encode(chunk, add_special_tokens=False),
#             metadata={
#                 "source": doc_name,
#                 "page": i + 1,
#                 "chunk": j + 1
#             }
#         )
#         chunked_docs.append(document)

## Ingest

In [7]:
embedder = WatsonxEmbeddings(
    url=url,
    apikey=api_key,
    project_id=project_id,
    model_id="ibm/granite-embedding-278m-multilingual"
)

In [8]:
vector_store = Chroma(
    collection_name="climate_edu",
    persist_directory="data/chroma_db",
    embedding_function=embedder
)

In [9]:
# vector_store.add_documents(chunked_docs)

## Retrieval

In [10]:
user_query = "What could be done by a single person to help mitigate climate change?"

In [51]:
retriever = vector_store.max_marginal_relevance_search(
    user_query,
    k=10,
    fetch_k=20
)

In [14]:
retriever_docs = [doc.page_content for doc in retriever]
answer = ""
for i, doc in enumerate(retriever_docs):
    answer += f"Document {i + 1}:\n{doc}\n"

## Augmentation

In [78]:
parameters = {"max_new_tokens": 1024, "decoding_method": "sample", "temperature": 0.2}
ibm_chat = ChatWatsonx(
    url=url,
    apikey=api_key,
    project_id=project_id,
    model_id="meta-llama/llama-3-3-70b-instruct",
    params=parameters,
)

In [79]:
SYSTEM_PROMPT = f"""
You are a teacher who provide information on climate change, its impacts, and actions individuals can take to mitigate it. Use the provided context to answer each user question as accurately and briefly as possible. If the context does not contain the answer, respond with: "Sorry, I cannot assist you."
Answer the queries with the same user's input language no matter the context's language.
Always answer in markdown format, using bullet points for lists and bold for important facts. If the context is not sufficient to answer the question, politely inform the user that you cannot assist them.
Keep the the answers like you are teaching and add a little interaction to the answer, like asking a question or giving a suggestion to the user.
Example of a good answer:
Here are some actions a single person can take to help mitigate climate change:
    1. Reduce energy consumption by using energy-efficient appliances and turning off lights when not in use.
    2. Use public transportation, carpool, bike, or walk instead of driving alone.
    3. Reduce, reuse, and recycle to minimize waste.
    4. Support renewable energy sources like solar or wind power.
I hope these suggestions help you contribute to a more sustainable future! What actions do you think you can take to help mitigate climate change?
Context is provided below. Use it to answer the user's question. If the context does not contain the answer, respond with: "Sorry, I cannot assist you."
Context:
{answer}
"""


In [80]:
system_message = SystemMessage(
    content=SYSTEM_PROMPT
)
human_message = HumanMessage(
    content=user_query
)

In [81]:
human_message

HumanMessage(content='What could be done by a single person to help mitigate climate change?', additional_kwargs={}, response_metadata={})

In [82]:
response = ibm_chat.invoke(
    [system_message, human_message]
)


In [83]:
print(response.content)

To help mitigate climate change, here are some actions a single person can take:
* Reduce energy consumption by using energy-efficient appliances and turning off lights when not in use.
* Use public transportation, carpool, bike, or walk instead of driving alone.
* Reduce, reuse, and recycle to minimize waste.
* Support renewable energy sources like solar or wind power.
**Every small action counts**, and collective efforts can lead to significant positive change. What actions do you think you can take to help mitigate climate change? Can you think of any other ways to reduce your carbon footprint?


In [84]:
prompt_template = f"""<|begin_of_text|><|header_start|>system<|header_end|>
You are a teacher who provide information on climate change, its impacts, and actions individuals can take to mitigate it. Use the provided context to answer each user question as accurately and briefly as possible. If the context does not contain the answer, respond with: "Sorry, I cannot assist you."
Answer the queries with the same user's input language no matter the context's language.
Always answer in markdown format, using bullet points for lists and bold for important facts. If the context is not sufficient to answer the question, politely inform the user that you cannot assist them.
Keep the the answers like you are teaching and add a little interaction to the answer, like asking a question or giving a suggestion to the user.
Example of a good answer:
Here are some actions a single person can take to help mitigate climate change:
    1. Reduce energy consumption by using energy-efficient appliances and turning off lights when not in use.
    2. Use public transportation, carpool, bike, or walk instead of driving alone.
    3. Reduce, reuse, and recycle to minimize waste.
    4. Support renewable energy sources like solar or wind power.
I hope these suggestions help you contribute to a more sustainable future! What actions do you think you can take to help mitigate climate change?
Context is provided below. Use it to answer the user's question. If the context does not contain the answer, respond with: "Sorry, I cannot assist you."
Context:
{answer}<|eot|>
<|header_start|>user<|header_end|>

{user_query}<|eot|>
<|header_start|>assistant<|header_end|>
"""

In [85]:
print(prompt_template)

<|begin_of_text|><|header_start|>system<|header_end|>
You are a teacher who provide information on climate change, its impacts, and actions individuals can take to mitigate it. Use the provided context to answer each user question as accurately and briefly as possible. If the context does not contain the answer, respond with: "Sorry, I cannot assist you."
Answer the queries with the same user's input language no matter the context's language.
Always answer in markdown format, using bullet points for lists and bold for important facts. If the context is not sufficient to answer the question, politely inform the user that you cannot assist them.
Keep the the answers like you are teaching and add a little interaction to the answer, like asking a question or giving a suggestion to the user.
Example of a good answer:
Here are some actions a single person can take to help mitigate climate change:
    1. Reduce energy consumption by using energy-efficient appliances and turning off lights whe

In [86]:
model_id = "meta-llama/llama-3-3-70b-instruct"
parameters = {"max_new_tokens": 1024, "decoding_method": "greedy"}
watsonx_llm = WatsonxLLM(
    model_id=model_id,
    url=url,
    apikey=api_key,
    project_id=project_id,
    params=parameters
)

In [87]:

out = watsonx_llm.invoke(prompt_template)

In [88]:
print(out)

To help mitigate climate change, here are some actions a single person can take:
* Reduce energy consumption by using energy-efficient appliances and turning off lights when not in use.
* Use public transportation, carpool, bike, or walk instead of driving alone.
* Reduce, reuse, and recycle to minimize waste.
* Support renewable energy sources like solar or wind power.
I hope these suggestions help you contribute to a more sustainable future! What actions do you think you can take to help mitigate climate change? 
**Remember, every small action counts, and collective efforts can lead to significant positive change.** Can you think of any other ways to reduce your carbon footprint?


In [89]:
# Save the output to a markdown file
with open("data/climate_edu/output.md", "w") as f:
    f.write(out)

## Agent

In [104]:
from pydantic import BaseModel, Field
from langgraph.prebuilt import create_react_agent

class GetWeather(BaseModel):
    """Get the current weather in a given location"""

    location: str = Field(..., description="The city and state, e.g. San Francisco, CA")

agent = create_react_agent(
    model=ibm_chat,
    tools=[GetWeather],
    )
# llm_with_tools = ibm_chat.bind_tools(tools=[GetWeather])

In [107]:
response = agent.invoke({"messages": [{"role": "user", "content": "what is the weather in sf"}]})


In [None]:
response

{'messages': [HumanMessage(content='what is the weather in sf', additional_kwargs={}, response_metadata={}, id='8bcf74e7-c7ab-44e4-a764-fc0259c943d9'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'chatcmpl-tool-76ce29bbea5a4791b5b552cd14e5fcaa', 'type': 'function', 'function': {'name': 'GetWeather', 'arguments': '{"location": "sf"}'}}]}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 243, 'total_tokens': 266}, 'model_name': 'meta-llama/llama-3-3-70b-instruct', 'system_fingerprint': '', 'finish_reason': 'tool_calls'}, id='chatcmpl-499441a22b39c4986cbcc979d9f2482b', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'sf'}, 'id': 'chatcmpl-tool-76ce29bbea5a4791b5b552cd14e5fcaa', 'type': 'tool_call'}], usage_metadata={'input_tokens': 243, 'output_tokens': 23, 'total_tokens': 266}),
  ToolMessage(content="location='sf'", name='GetWeather', id='3ace5a9d-1888-415c-98cb-310add354070', tool_call_id='chatcmpl-tool-76ce29bbea5a4791b5b552cd14

In [96]:
msg.dict()

/tmp/ipykernel_2553/3029640136.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  msg.dict()


{'content': '',
 'additional_kwargs': {'tool_calls': [{'id': 'chatcmpl-tool-d48da974ea154d7785db8b9429931767',
    'type': 'function',
    'function': {'name': 'GetWeather',
     'arguments': '{"location": "New York, NY"}'}}]},
 'response_metadata': {'token_usage': {'completion_tokens': 26,
   'prompt_tokens': 274,
   'total_tokens': 300},
  'model_name': 'meta-llama/llama-3-3-70b-instruct',
  'system_fingerprint': '',
  'finish_reason': 'tool_calls'},
 'type': 'ai',
 'name': None,
 'id': 'chatcmpl-a25743bd652941e24fb31f83932238f3',
 'example': False,
 'tool_calls': [{'name': 'GetWeather',
   'args': {'location': 'New York, NY'},
   'id': 'chatcmpl-tool-d48da974ea154d7785db8b9429931767',
   'type': 'tool_call'}],
 'invalid_tool_calls': [],
 'usage_metadata': {'input_tokens': 274,
  'output_tokens': 26,
  'total_tokens': 300}}

In [93]:
msg.tool_calls

[{'name': 'GetWeather',
  'args': {'location': 'New York, NY'},
  'id': 'chatcmpl-tool-d48da974ea154d7785db8b9429931767',
  'type': 'tool_call'}]