In [3]:
import fitz
import warnings
warnings.filterwarnings("ignore")
import sys
import os

# Add the parent directory to sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

from common.common import * 

In [4]:
document = fitz.open("../rag/sample.pdf")
parsed_content = parse_pdf_sections(document)
chunks = []

for main_section_key, main_section_value in parsed_content.items():
    for section_key, section_value in main_section_value.items():
        for sub_section_key, sub_section_value in section_value.items():
            metadata = (main_section_key + " " + section_key + " " + sub_section_key).strip().split()
            chnk = fixed_size_chunking(sub_section_value, metadata, 1000, 200, char=False)
            txt_chnk = [' '.join(c) for c in chnk]
            chunks.extend(txt_chnk)
            lowercased_list = [str(item).lower() for item in chunks]

In [5]:
import warnings
warnings.filterwarnings("ignore")

from llama_index.core.schema import Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex, Settings
embed_model = HuggingFaceEmbedding()

# Settings
Settings.embed_model = embed_model


index = VectorStoreIndex([])
for chunk in lowercased_list:
    index.insert(Document(text=chunk, extra_info={}))

In [6]:
from llama_index.core.agent import ReActAgent
from llama_index.core.tools import FunctionTool
import requests
from bs4 import BeautifulSoup
from llama_index.core.tools import QueryEngineTool
from llama_index.llms.ollama import Ollama
llm = Ollama(model = "llama3.1", request_timeout=420.0)

def get_weather(city_name):
    try:
        # Fetch weather data from wttr.in
        url = f"https://wttr.in/{city_name}?format=%C+%t+%h+%w"
        response = requests.get(url)
        response.raise_for_status()
        
        # The response contains the weather information in plain text
        weather_info = response.text.strip()
        return f"Weather in {city_name}:\n{weather_info}"
    except requests.exceptions.RequestException as e:
        return f"An error occurred: {e}"
    
def calculator(a, b, operator):
    try:
        if operator == "+":
            return a + b
        elif operator == "-":
            return a - b
        elif operator == "*":
            return a * b
        elif operator == "/":
            return a / b
        else:
            return "Invalid operator. Please use one of the following: +, -, *, /"
    except ZeroDivisionError:
        return "Cannot divide by zero."

In [9]:


weather_tool = FunctionTool.from_defaults(fn = get_weather)
calc_tool = FunctionTool.from_defaults(fn = calculator)
query_engine = index.as_query_engine(llm=llm)

spark_tool = QueryEngineTool.from_defaults(
    query_engine,
    name="spark_info",
    description="A tool to extract some information about Apache Spark.",
)

agent = ReActAgent.from_tools([weather_tool, spark_tool, calc_tool], llm=llm, verbose=False)

In [10]:
response = agent.chat(" Give me the solution of 5 + (5  * 5). use a tool for calculation.")
print(response)

The solution to the expression 5 + (5 * 5) is 30.


In [11]:

response = agent.chat("What is weather in Delhi? Use weather tool to get weather details.")

print(response)

Overcast with a temperature of 18 degrees Celsius and a humidity level of 28%, with winds going at 9 kilometers per hour.


In [12]:
response = agent.chat(
    "Give me the four component names of Spark?"
)
print(response)

The four component names of Apache Spark are spark sql, spark mllib, spark structured streaming, and graphx.


In [13]:
response = agent.chat("Can you tell me something about Spark’s Early Years at AMPLab with important milestones in 500 words.")

print(response)

The solution to the original question is that Apache Spark started in 2009 at AMPLab as a more efficient system for interactive and iterative computing jobs.
