# Lesson 3: Agentic Search

In [3]:
%pip install -r requirements.txt

Collecting python-dotenv~=1.0.1 (from -r requirements.txt (line 11))
  Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting tavily-python==0.3.3 (from -r requirements.txt (line 12))
  Using cached tavily_python-0.3.3-py3-none-any.whl.metadata (4.4 kB)
Collecting duckduckgo_search==5.3.1b1 (from -r requirements.txt (line 13))
  Using cached duckduckgo_search-5.3.1b1-py3-none-any.whl.metadata (18 kB)
Collecting openai==1.30.1 (from -r requirements.txt (line 15))
  Using cached openai-1.30.1-py3-none-any.whl.metadata (21 kB)
Collecting langgraph==0.0.53 (from -r requirements.txt (line 16))
  Using cached langgraph-0.0.53-py3-none-any.whl.metadata (23 kB)
Collecting langchain_core==0.2.0 (from -r requirements.txt (line 17))
  Using cached langchain_core-0.2.0-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain_openai==0.1.7 (from -r requirements.txt (line 18))
  Using cached langchain_openai-0.1.7-py3-none-any.whl.metadata (2.5 kB)
Collecting langchain-communit

In [4]:
# libraries
from dotenv import load_dotenv
import os
from tavily import TavilyClient

# load environment variables from .env file
_ = load_dotenv()

# connect
client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))

In [5]:
# run search
result = client.search("What is in Nvidia's new Blackwell GPU?",
                       include_answer=True)

# print the answer
result["answer"]


'The new Nvidia Blackwell GPU is part of the RTX 50-series graphics cards and is set to deliver up to 20 petaflops of compute power. It is designed to power the next generation of AI supercomputers and offers significant performance improvements over its predecessor. The architecture of the Blackwell GPU features six cores and enables organizations to build and run real-time generative AI on trillion-parameter large language models at a reduced cost and energy consumption compared to previous models.'

## Regular search

In [38]:
# choose location (try to change to your own city!)

city = "Sydney"

query = f"""
    what is the current weather in {city}?
    Should I travel there today?
    "weather.com"
"""

> Note: search was modified to return expected results in the event of an exception. High volumes of student traffic sometimes cause rate limit exceptions.

In [39]:
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
import re

ddg = DDGS()

def search(query, max_results=6):
    try:
        results = ddg.text(query, max_results=max_results)
        return [i["href"] for i in results]
    except Exception as e:
        print(f"returning previous results due to exception reaching ddg.")
        results = [ # cover case where DDG rate limits due to high deeplearning.ai volume
            "https://weather.com/weather/today/l/USCA0987:1:US",
            "https://weather.com/weather/hourbyhour/l/54f9d8baac32496f6b5497b4bf7a277c3e2e6cc5625de69680e6169e7e38e9a8",
        ]
        return results  


for i in search(query):
    print(i)

https://weather.com/weather/today/l/98ef17e6662508c0af6d8bd04adacecde842fb533434fcd2c046730675fba371
https://weather.com/weather/tenday/l/98ef17e6662508c0af6d8bd04adacecde842fb533434fcd2c046730675fba371
https://www.weather.com.au/nsw/sydney/current
https://www.weather.com.au/nsw
https://weather.com/
https://www.moongiant.com/phase/today/


In [40]:
def scrape_weather_info(url):
    """Scrape content from the given URL"""
    if not url:
        return "Weather information could not be found."
    
    # fetch data
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return "Failed to retrieve the webpage."

    # parse result
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup


> Note: This produces a long output, you may want to right click and clear the cell output after you look at it briefly to avoid scrolling past it.

In [41]:
# use DuckDuckGo to find websites and take the first result
url = search(query)[0]

# scrape first wesbsite
soup = scrape_weather_info(url)

print(f"Website: {url}\n\n")
print(str(soup.body)[:50000]) # limit long outputs

Website: https://www.weather.com.au/nsw/sydney


<body>
<div id="outer">
<div id="inner">
<div id="header">
<div id="logo">
<a href="/"><img alt="Logo" src="https://www.weather.com.au/images/logo.gif"/></a>
</div>
<div id="adSmallBanner">
<script async="" src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>
<!-- 320x50 -->
<ins class="adsbygoogle" data-ad-client="ca-pub-1715420423616185" data-ad-slot="2488649857" style="display:inline-block;width:320px;height:50px"></ins>
<script>
     (adsbygoogle = window.adsbygoogle || []).push({});
</script>
</div>
</div>
<div id="body">
<div id="breadcrumbs">
<a href="/">Home</a> &gt; <a href="/nsw">New South Wales</a> &gt; Sydney
			</div>
<div id="main">
<div id="nav">
<div id="navInner">
<div id="au">
<a href="/qld" id="qld"></a>
<a href="/nsw" id="nsw"></a>
<a href="/vic" id="vic"></a>
<a href="/tas" id="tas"></a>
<a href="/sa" id="sa"></a>
<a href="/wa" id="wa"></a>
<a href="/nt" id="nt"></a>
</div>
<ul class="navMen

In [42]:
# extract text
weather_data = []
for tag in soup.find_all(['h1', 'h2', 'h3', 'p']):
    text = tag.get_text(" ", strip=True)
    weather_data.append(text)

# combine all elements into a single string
weather_data = "\n".join(weather_data)

# remove all spaces from the combined text
weather_data = re.sub(r'\s+', ' ', weather_data)
    
print(f"Website: {url}\n\n")
print(weather_data)

Website: https://www.weather.com.au/nsw/sydney


Sydney Weather


## Agentic Search

In [43]:
# run search
result = client.search(query, max_results=1)

# print first result
data = result["results"][0]["content"]

print(data)

{'location': {'name': 'Sydney', 'region': 'New South Wales', 'country': 'Australia', 'lat': -33.88, 'lon': 151.22, 'tz_id': 'Australia/Sydney', 'localtime_epoch': 1719582620, 'localtime': '2024-06-28 23:50'}, 'current': {'last_updated_epoch': 1719582300, 'last_updated': '2024-06-28 23:45', 'temp_c': 12.4, 'temp_f': 54.3, 'is_day': 0, 'condition': {'text': 'Clear', 'icon': '//cdn.weatherapi.com/weather/64x64/night/113.png', 'code': 1000}, 'wind_mph': 9.4, 'wind_kph': 15.1, 'wind_degree': 350, 'wind_dir': 'N', 'pressure_mb': 1023.0, 'pressure_in': 30.21, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 71, 'cloud': 0, 'feelslike_c': 11.4, 'feelslike_f': 52.5, 'windchill_c': 10.5, 'windchill_f': 50.9, 'heatindex_c': 11.7, 'heatindex_f': 53.0, 'dewpoint_c': 7.3, 'dewpoint_f': 45.1, 'vis_km': 10.0, 'vis_miles': 6.0, 'uv': 1.0, 'gust_mph': 11.7, 'gust_kph': 18.8}}


In [44]:
import json
from pygments import highlight, lexers, formatters

# parse JSON
parsed_json = json.loads(data.replace("'", '"'))

# pretty print JSON with syntax highlighting
formatted_json = json.dumps(parsed_json, indent=4)
colorful_json = highlight(formatted_json,
                          lexers.JsonLexer(),
                          formatters.TerminalFormatter())

print(colorful_json)


{[37m[39;49;00m
[37m    [39;49;00m[94m"location"[39;49;00m:[37m [39;49;00m{[37m[39;49;00m
[37m        [39;49;00m[94m"name"[39;49;00m:[37m [39;49;00m[33m"Sydney"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"region"[39;49;00m:[37m [39;49;00m[33m"New South Wales"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"country"[39;49;00m:[37m [39;49;00m[33m"Australia"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lat"[39;49;00m:[37m [39;49;00m[34m-33.88[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lon"[39;49;00m:[37m [39;49;00m[34m151.22[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"tz_id"[39;49;00m:[37m [39;49;00m[33m"Australia/Sydney"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime_epoch"[39;49;00m:[37m [39;49;00m[34m1719582620[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime"[39;49;00m:[37m [39;49;00m[33m"2024-06-28 23:50"[39;49;00m[37m[39;49;00m
[3

<img src="./google_sample.png" width="800" height="600">