In [1]:
%%capture
# installing dependencies
%pip install -U bitsandbytes
!pip install llama-index
!pip install llama-index-llms-huggingface
!pip install llama-index-embeddings-huggingface
!pip install llama-index-embeddings-huggingface-api

!pip install serpapi

# Search API and Preposessing

In [2]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
serp_api_key = user_secrets.get_secret("serp")
hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")

In [3]:
import serpapi

In [4]:
def search_response(query):
    return serpapi.search(q=query, engine="google", location="Austin, Texas", hl="en", gl="us", api_key = serp_api_key)

In [None]:
def extract_info(json_data):

    results = []

    # for "knowledge_graph" and extracting specified fields
    if json_data.get("knowledge_graph"):
        results.append("Biography: ")
        knowledge_graph = json_data["knowledge_graph"]
        fields_to_extract = [
            "title", "type", "description", "born", "children",
            "net_worth", "spouse", "education", "nationality", "height"
        ]
        for field in fields_to_extract:
            if field in knowledge_graph:
                results.append(f"{field.capitalize()}: {knowledge_graph[field]}")

    # for "answer_box" and handling "calculator_result" or "snippet"
    if json_data.get("answer_box"):
        answer_box = json_data["answer_box"]
        if answer_box.get("type") == "calculator_result" and "result" in answer_box:
            return f"calculated value={answer_box['result']}"
        elif "snippet" in answer_box:
            results.append(f'Answer to Query: {answer_box["snippet"]}')  # No tag for answer_box snippet

    # for "related_questions" and tagging "question" and "snippet"
    if json_data.get("related_questions"):
        results.append("Related Questions: ")
        for question in json_data["related_questions"]:
            if "question" in question:
                results.append(f"Question: {question['question']}")
            if "snippet" in question:
                results.append(f"Answer: {question['snippet']}")

    # for "organic_results" and getting "snippet"
    if json_data.get("organic_results"):
        results.append("Additional Information: ")
        for result in json_data["organic_results"]:
            if "snippet" in result:
                snippet = result["snippet"]
                # Exclude incomplete sentences (those ending with "...")
                if not snippet.endswith("..."):
                    results.append(snippet)

    return results

### Dealing with wordy mathematical expressions.

#### Initially implemented this with llama3, but the google search API also provides a calculator function and its pretty robust, better performance then llama3 8b. Example given below.

In [28]:
extract_info(search_response("5 to the power 2 multiplied by 6 plus 3 multiplied by 2"))

'calculated value=156'

In [29]:
extract_info(search_response("divide pi by 4 plus 6"))

'calculated value=0.3141592653589793'

### Processing raw search data

#### Filtered out the relavant components from the json response which were
- The "snippet" in "answer_box" (given priority as it gives the most relavant answer to query)
- The multiple "question" and "snippet" sections in "related_questions" (as it often gave new relavant info on the query)
- The multiple "snippet" sections in "organic_results" (the descriptions of websites on the search page, often providing relavant info)

#### This will be given to the LLM (LLaMA 3) so that it can provide a context-augmented response to the query.

In [15]:
extract_info(search_response("how many girlfriends has elon musk had?")) # 3rd Answer gives the required response

['Related Questions: ',
 "Question: Who are Elon Musk's ex-girlfriends?",
 'Answer: Justine Musk has previously referred to herself as Elon\'s "Starter Wife." Elon Musk began dating Talulah Riley shortly after his first divorce. Elon began dating actress Amber Heard in late 2016. Elon Musk (L) and Canadian musician Grimes (Claire Boucher) dated for three years before calling it quits.',
 'Question: How many times has Elon Musk been married and how many children does he have?',
 'Answer: In 2020 they welcomed their son Xac Musk although the name had to be changed due to legal reasons in California. And he is now known as XAZ Musk.',
 'Question: Does Elon Musk have 13 children?',
 "Answer: The tech entrepreneur has 12 children with three different women, including one child who died in 2002. Elon Musk, the world's richest man, is the father of 12 children with three different partners.",
 'Additional Information: ',
 'The X (formerly known as Twitter) owner has had a slew of high-profile

In [16]:
extract_info(search_response("what is the earning of srk"))

['Answer to Query: Shah Rukh Khan, earns an estimated annual income of around 38 million USD or around 280 crore INR. He makes this money through various sources including acting in films, owning a sports team and production company, endorsements, investments, and other business ventures.',
 'Related Questions: ',
 'Question: How much does Shah Rukh Khan earn?',
 "Answer: Acting and Film Production Shah Rukh Khan's primary source of income remains his successful career in Bollywood. In 2024, he topped the Forbes list of highest-paid actors in India, earning between Rs 150 to 250 crore per film.",
 'Question: Who is richer, Srk or Virat Kohli?',
 'Answer: Kohli leads with $227.9 million, followed by Ranveer Singh at $203.1 million, Shah Rukh Khan at $120.7 million, Akshay Kumar at $111.7 million, and Alia Bhatt at $101.1 million.',
 'Question: Who is richer, Amitabh or Shah Rukh?',
 "Answer: From Shah Rukh Khan's net worth of Rs 7,300 Crore to Amitabh Bachchan's Rs 1,600 Crore net worth

# ReAct Agent (w/ LLaMA 3 8b instruct (4-bitq))

In [6]:
from transformers import AutoTokenizer
import torch
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import BitsAndBytesConfig

import json
from typing import Sequence, List

from llama_index.core.llms import ChatMessage
from llama_index.core.tools import BaseTool, FunctionTool
from llama_index.core.agent import ReActAgent

import nest_asyncio

from IPython.display import Javascript

In [7]:
tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Meta-Llama-3-8B-Instruct",
    token=hf_token,
)

stopping_ids = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

In [8]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

llm = HuggingFaceLLM(
    model_name="meta-llama/Meta-Llama-3-8B-Instruct",
    model_kwargs={
        "token": hf_token,
        "quantization_config": quantization_config
    },
    generate_kwargs={
        "do_sample": True,
        "temperature": 0.9999,
        "top_p": 0.9999,
    },
    tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
    tokenizer_kwargs={"token": hf_token},
    stopping_ids=stopping_ids,
)

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

In [9]:
nest_asyncio.apply()

In [14]:
def math_solver(math_query: str) -> float:
    """Compute the given mathematical query with all the variables known and return the result"""
    return (extract_info(search_response(math_query)))

def google_search(search_query: str) -> float:
    """Search the web for any unknown fact in user query and return relavant information about the query"""
    return (extract_info(search_response(search_query)))

mathsolver_tool = FunctionTool.from_defaults(fn=math_solver)
googlesearch_tool = FunctionTool.from_defaults(fn=google_search)

tools = [mathsolver_tool, googlesearch_tool]

In [47]:
agent = ReActAgent.from_tools(
    tools,
    llm=llm,
    verbose=True,
)

# Testing

In [22]:
response = agent.chat("What is five plus nine times the net worth of shah rukh khan?")
print(str(response))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


> Running step 27740ae3-4cf2-4d43-b5e7-daaf12a0e544. Step input: What is five plus nine times the net worth of shah rukh khan?


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;38;5;200mThought: The current language of the user is not specified, I need to use the tool to help me answer the question.
Action: google_search
Action Input: {'search_query': "Shah Rukh Khan's net worth"}
[0m[1;3;34mObservation: ["Answer to Query: According to Forbes richest list, Shahrukh's net worth in dollars is $770 Million and according to the world of Statistics Shahrukh Khan's net worth is more than Hollywood actor Robert De Niro ($500m), George Clooney ($500M), Jackie Chan ($520M), and Tom Cruise ($620M).", 'Related Questions: ', "Question: Who is India's no. 1 richest actor?", "Answer: Shah Rukh Khan is currently recognized as India's richest actor, boasting a net worth of approximately ₹7,300 crore (around $880 million) according to the 2024 Hurun India Rich List.", 'Question: Who is richer, Srk or Tom Cruise?', "Answer: The world's richest comedian In comparison, Dwayne Johnson has a net worth of $890 million, Shah Rukh Khan $870 million, Tom Cruise $800 million, a

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: calculated value=6930000005
[0m> Running step 6951bc46-345c-4b99-bd71-99159637fa06. Step input: None
[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer
Answer: The result of 5 plus 9 times the net worth of Shah Rukh Khan is approximately 6930000005.
[0mThe result of 5 plus 9 times the net worth of Shah Rukh Khan is approximately 6930000005.


In [23]:
response = agent.chat("What is five plus nine times the number of wives elon musk had?")
print(str(response))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


> Running step cba820ae-fc65-4de9-ac05-ae840f5b743b. Step input: What is five plus nine times the number of wives elon musk had?
[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me answer the question.
Action: google_search
Action Input: {'search_query': 'How many wives does Elon Musk have?'}
[0m

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: ['Answer to Query: Elon Musk, the renowned CEO of Tesla and SpaceX, has had a complex personal life that has garnered significant public interest. Musk has been married three times to two different women. His marital history includes two marriages to actress Talulah Riley and one marriage to author Justine Wilson.', 'Related Questions: ', 'Question: How many wives does Elon Musk have currently?', 'Answer: Family and Marriages In 2010, Musk married actress Talulah Riley. Their relationship saw two marriages and two divorces, finally ending in 2016. As of 2024, Musk is not currently married. He has fathered additional children with other partners, bringing his total number of known children to ten.', "Question: Who are Elon Musk's 10 children?", 'Answer: None', "Question: Who was Elon's first wife?", "Answer: Elon Musk's first wife, Justine Wilson, played a significant role in the early stages of the tech mogul's personal life. The couple met while studying at Queen

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: calculated value=32
[0m> Running step a3e316ac-04ed-4450-8bc2-074a319aae57. Step input: None
[1;3;38;5;200mThought: I can answer the question without using any more tools. I'll use the user's language to answer
Answer: Five plus nine times the number of wives Elon Musk had is 32.
[0mFive plus nine times the number of wives Elon Musk had is 32.


# More Complex Testing

In [26]:
response = agent.chat("What is three raised to the power of two, times the current value of gold divided by the year hiroshima was bombed?")
print(str(response))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


> Running step 4ffdffc8-3c9d-40bc-ab1b-3bf080b2a447. Step input: What is three raised to the power of two, times the current value of gold divided by the year hiroshima was bombed?
[1;3;38;5;200mThought: The current value of gold is unknown, so I'll need to use the google_search tool to find out.
Action: google_search
Action Input: {'search_query': 'current value of gold'}
[0m

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: ['Related Questions: ', 'Question: What is 1 oz of gold worth right now?', 'Answer: Gold Spot Price', 'Question: How much is 14K gold worth?', "Answer: Today's Gold Prices", 'Question: Is now a good time to sell gold?', 'Answer: The bottom line "Now is a terrific time to take some profits [as] gold [is] at all-time highs," suggests Miller. This would allow you to recover your initial investment while keeping some position in gold.', 'Question: What will gold be worth in 5 years?', 'Answer: Gold price prediction overview:', 'Additional Information: ', 'Gold Spot Price ; Gold Price Per Ounce. $2,677.70 USD, $22.50 USD ; Gold Price Per Gram. $86.09 USD, $0.72 USD ; Gold Price Per Kilo. $86,089.99 USD, $723.39 USD.', 'Live Gold Charts and Gold Spot Price from International Gold Markets, Prices from New York, London, Hong Kong and Sydney provided by Kitco.', '1 Troy Ounce ≈ 1,097 Ounce, Gold Price Per 1 Ounce, 2404.93 USD ; 1 Troy Ounce ≈ 0,031 Kilogram, Gold Price Per

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: calculated value=12.39038560411311
[0m> Running step 1c4cd0b6-f380-4833-99f8-58a2408713d5. Step input: None
[1;3;38;5;200mThought: The result of the mathematical query is approximately 12.39038560411311.
Thought: I can answer without using any more tools. I'll use the user's language to answer.
Answer: The result of three raised to the power of two, times the current value of gold divided by the year Hiroshima was bombed is approximately 12.39.
[0mThe result of three raised to the power of two, times the current value of gold divided by the year Hiroshima was bombed is approximately 12.39.


In [35]:
extract_info(search_response("srk net worth"))

["Answer to Query: #NewsByNumbers • With a net worth of $770 million, #ShahRukhKhan is the fourth richest actor and the richest Asian actor in the world. Here's a look at the Top 10 richest actors on the planet.",
 'Related Questions: ',
 "Question: Who is world's no. 1 richest actor?",
 'Answer: Tyler Perry stands on the top position in the list of richest actor in the world 2024 with the net worth of $1 Billion, followed by Jerry Seinfeld, Dwayne Johnson and Shah Rukh Khan. Here is the list of Top 10 Richest Actors in the World: S. No.',
 'Question: Who is the richest actor in India?',
 'Answer: None',
 'Question: Who is richer, Srk or Tom Cruise?',
 "Answer: The world's richest comedian In comparison, Dwayne Johnson has a net worth of $890 million, Shah Rukh Khan $870 million, Tom Cruise $800 million, and Brad Pitt 'only' $400 million.",
 'Question: What is the net worth of Tom Cruise?',
 'Answer: Despite his $600 million fortune, Cruise reportedly lives a nomadic lifestyle, moving 

In [27]:
response = agent.chat("What is three raised to the power of two, times the current value of gold divided by the year when fluidai company was founded?")
print(str(response))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


> Running step 6c09e35b-f784-4f1f-ba34-2999c03c67cd. Step input: What is three raised to the power of two, times the current value of gold divided by the year when fluidai company was founded?
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: google_search
Action Input: {'search_query': 'year when Fluid AI company was founded'}
[0m

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: ['Answer to Query: Mumbai-based Fluid AI was founded by the two brothers in 2008. The startup offers intuitive user experiences and data analytics for businesses including Rolls Royce and Vodafone.', 'Related Questions: ', 'Question: When was FluidAI founded?', 'Answer: Helwa co-founded FluidAI in 2014 while studying nanotechnology and electrical engineering at the University of Waterloo.', 'Question: Who is the founder of fluid AI?', 'Answer: Back in 2012, Our Founders Abhinav Aggarwal and Raghav Aggarwal embarked on this adventure with nothing but just an idea. They assembled a talented team of engineers to bring their vision to life.', 'Question: How many employees does fluid AI have?', 'Answer: Fluid AI employs 11 to 50 employees in India.', 'Question: Which company made AI first?', 'Answer: Some of the first companies that made significant contributions to the field of AI in the world: IBM: IBM was one of the first companies to work on AI research, starting i

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: ['Related Questions: ', 'Question: What was the value of gold in 2009?', 'Answer: Gold Prices - 100 Year Historical Chart', 'Question: Is gold worth more now than 10 years ago?', 'Answer: The price of gold fluctuates but historically over the long term, it trends higher. At the time of writing, the 10-year increase is 55.67%. This means that if you invested $1,000 in gold 10 years ago, it would be worth $1,550 today.', 'Question: How do I calculate my gold price?', 'Answer: Jewellers employ the following formula to calculate gold rates: Final Jewellery Price = Gold Price per Gram (22 or 18 Carat) × Weight in Grams + Making Charges per Gram + Goods and Services Tax (GST) on (Jewellery Price + Making Charges).', 'Question: What if I bought gold 20 years ago?', 'Answer: With gold trading at record highs above $2,600 per ounce, 20 years later that half ounce would be worth about $1,300. Over those 20 years, your $200 investment would have grown to $1,300, a 550% retur

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: calculated value=2.7461423593827776
[0m> Running step 9c1216ba-f915-44f1-8e1a-f88815b6cf0e. Step input: None
[1;3;38;5;200mThought: I can answer the question without using any more tools.
Answer: The result of 3*2*(current value of gold) / 2009 is approximately 2.7461423593827776.
[0mThe result of 3*2*(current value of gold) / 2009 is approximately 2.7461423593827776.


In [48]:
response = agent.chat("What is the net worth of kamal raj khan?")
print(str(response))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


> Running step 5f173747-ca19-4560-ad26-746c9dc128e5. Step input: What is the net worth of kamal raj khan?
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: google_search
Action Input: {'search_query': 'kamal raj khan net worth'}
[0m

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: ['Related Questions: ', 'Question: What is Shahid Khan net worth?', 'Answer: None', 'Question: What is the net worth of Bollywood Amir Khan?', 'Answer: 1862 crores. The actor is the owner of some of the most expensive things. Read the article to know about the list of five expensive things he owns. #aamirkhan #aamirkhanfan #networth #bollywoodactor.', 'Question: Who is the son of Krk?', 'Answer: None', 'Question: What is the full form of KRK?', 'Answer: Kamaal R. Khan', 'Additional Information: ', 'Kamaal R. Khan is an Indian actor, producer and writer in Hindi and Bhojpuri cinema. He has produced or written most of the films he has starred in.', '3M Followers, 40 Following, 517 Posts - KRK (@kamaalrkhan) on Instagram: "#Actor #Critic #TradeAnalyst"', 'Kamaal R Khan - KRK makes approximately $ 2.96K per month. What is net worth of Kamaal R Khan - KRK? $ 328K is approximately net worth of Kamaal R Khan - KRK.', "What is KRK's net worth? There is no accurate informa