# Lesson 3: Agentic Search

In [1]:
# libraries
from dotenv import load_dotenv
import os
from tavily import TavilyClient

# load environment variables from .env file
_ = load_dotenv()

# connect
client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))

In [2]:
# run search
result = client.search("What is in Nvidia's new Blackwell GPU?",
                       include_answer=True)

# print the answer
result["answer"]


"The new Nvidia Blackwell GPU, specifically the GB200 superchip, features two Nvidia B200 Tensor Core GPUs and a Grace CPU, containing 208 billion transistors across its two GPU dies, connected by a 10 TB/second chip-to-chip link. This powerful architecture provides 7x the performance and four times the training speed of an H100-powered system, making it a significant advancement in Nvidia's hardware lineup."

## Regular search

In [4]:
# choose location (try to change to your own city!)

city = "Faisalabad"

query = f"""
    what is the current weather in {city}?
    Should I travel there today?
    "weather.com"
"""

# run search
result = client.search(query,
                       include_answer=True)

# print the answer
result["answer"]

'The current weather in Faisalabad is partly cloudy with a temperature of 31.8°C (89.3°F). The humidity is at 63% with a slight breeze coming from the east at 12.6 km/h (7.8 mph). It feels like 37.6°C (99.8°F). Visibility is good at 10.0 km (6.0 miles). Considering the weather conditions, it seems suitable for travel today.'

> Note: search was modified to return expected results in the event of an exception. High volumes of student traffic sometimes cause rate limit exceptions.

In [5]:
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
import re

ddg = DDGS()

def search(query, max_results=6):
    try:
        results = ddg.text(query, max_results=max_results)
        return [i["href"] for i in results]
    except Exception as e:
        print(f"returning previous results due to exception reaching ddg.")
        results = [ # cover case where DDG rate limits due to high deeplearning.ai volume
            "https://weather.com/weather/today/l/USCA0987:1:US",
            "https://weather.com/weather/hourbyhour/l/54f9d8baac32496f6b5497b4bf7a277c3e2e6cc5625de69680e6169e7e38e9a8",
        ]
        return results  


for i in search(query):
    print(i)

https://weather.com/weather/tenday/l/Faisalabad+Punjab+Pakistan?canonicalCityId=90ff1ccc4d865ebc81cba3ee9e893a45da8613d28b1a0a8eeae241129b9c5a68
https://weather.com/en-PK/weather/hourbyhour/l/Islamabad+Islamabad+Capital+Territory?canonicalCityId=0d5ed3bbc3987ada5e00f6f71f4ea0cc37c42e6b77e962c63c64c8565a442703
https://weather.com/en-PK/weather/today/l/Islamabad+Islamabad+Capital+Territory?canonicalCityId=0d5ed3bbc3987ada5e00f6f71f4ea0cc37c42e6b77e962c63c64c8565a442703
https://weather.com/en-PK/weather/tenday/l/Islamabad+Islamabad+Capital+Territory?canonicalCityId=0d5ed3bbc3987ada5e00f6f71f4ea0cc37c42e6b77e962c63c64c8565a442703
https://weather.com/weather/tenday/l/Islamabad+Islamabad+Capital+Territory+Pakistan?canonicalCityId=0d5ed3bbc3987ada5e00f6f71f4ea0cc37c42e6b77e962c63c64c8565a442703
https://weather.com/weather/hourbyhour/l/Multan+Punjab+Pakistan?canonicalCityId=c530536b4b53b1193dfcee9d1f8a32031f7a29bce1e6e1e9e3de61782b82e4c2


In [6]:
def scrape_weather_info(url):
    """Scrape content from the given URL"""
    if not url:
        return "Weather information could not be found."
    
    # fetch data
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return "Failed to retrieve the webpage."

    # parse result
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup


> Note: This produces a long output, you may want to right click and clear the cell output after you look at it briefly to avoid scrolling past it.

In [7]:
# use DuckDuckGo to find websites and take the first result
url = search(query)[0]

# scrape first wesbsite
soup = scrape_weather_info(url)

print(f"Website: {url}\n\n")
print(str(soup.body)[:500]) # limit long outputs

Website: https://weather.com/weather/tenday/l/Faisalabad+Punjab+Pakistan?canonicalCityId=90ff1ccc4d865ebc81cba3ee9e893a45da8613d28b1a0a8eeae241129b9c5a68


<body><div class="appWrapper DaybreakLargeScreen LargeScreen lightTheme twcTheme DaybreakLargeScreen--appWrapper--3kKUE gradients--cloudyFoggyNight--hTBfM gradients--cloudyFoggyNight-top--1Xd8n" id="appWrapper"><div class="region-meta"><div class="removeIfEmpty" id="WxuHtmlHead-meta-"></div></div><div class="region-topAds regionTopAds DaybreakLargeScreen--regionTopAds--2kcLJ"><div class="removeIfEmpty" id="WxuAd-topAds-53dce052-5465-4609-a555-c3a20ab64ab0"><div class="adWrapper BaseAd--adWrapper


In [8]:
# extract text
weather_data = []
for tag in soup.find_all(['h1', 'h2', 'h3', 'p']):
    text = tag.get_text(" ", strip=True)
    weather_data.append(text)

# combine all elements into a single string
weather_data = "\n".join(weather_data)

# remove all spaces from the combined text
weather_data = re.sub(r'\s+', ' ', weather_data)
    
print(f"Website: {url}\n\n")
print(weather_data)

Website: https://weather.com/weather/tenday/l/Faisalabad+Punjab+Pakistan?canonicalCityId=90ff1ccc4d865ebc81cba3ee9e893a45da8613d28b1a0a8eeae241129b9c5a68


recents Specialty Forecasts 10 Day Weather - Faisalabad, Punjab, Pakistan Tonight Sat 03 | Night Partly cloudy skies. Low 81F. Winds E at 5 to 10 mph. Sun 04 Sun 04 | Day Thunderstorms likely in the morning. Then a chance of scattered thunderstorms in the afternoon. High 92F. Winds ESE at 5 to 10 mph. Chance of rain 70%. Sun 04 | Night Increasing clouds with showers arriving sometime after midnight. Low 82F. Winds light and variable. Chance of rain 40%. Mon 05 Mon 05 | Day Overcast with a slight chance of showers and thunderstorms, mainly in the morning. High 94F. Winds E at 5 to 10 mph. Chance of rain 30%. Mon 05 | Night Mostly cloudy skies early, then partly cloudy after midnight. Low 83F. Winds light and variable. Tue 06 Tue 06 | Day Partly to mostly cloudy. High near 95F. Winds ESE at 5 to 10 mph. Tue 06 | Night Partly cloudy in

## Agentic Search

In [9]:
# run search
result = client.search(query, max_results=1)

# print first result
data = result["results"][0]["content"]

print(data)

{'location': {'name': 'Faisalabad', 'region': 'Punjab', 'country': 'Pakistan', 'lat': 31.42, 'lon': 73.08, 'tz_id': 'Asia/Karachi', 'localtime_epoch': 1722717032, 'localtime': '2024-08-04 1:30'}, 'current': {'last_updated_epoch': 1722717000, 'last_updated': '2024-08-04 01:30', 'temp_c': 31.8, 'temp_f': 89.3, 'is_day': 0, 'condition': {'text': 'Partly Cloudy', 'icon': '//cdn.weatherapi.com/weather/64x64/night/116.png', 'code': 1003}, 'wind_mph': 7.8, 'wind_kph': 12.6, 'wind_degree': 95, 'wind_dir': 'E', 'pressure_mb': 999.0, 'pressure_in': 29.5, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 63, 'cloud': 49, 'feelslike_c': 37.6, 'feelslike_f': 99.8, 'windchill_c': 31.8, 'windchill_f': 89.3, 'heatindex_c': 37.6, 'heatindex_f': 99.8, 'dewpoint_c': 23.9, 'dewpoint_f': 75.0, 'vis_km': 10.0, 'vis_miles': 6.0, 'uv': 1.0, 'gust_mph': 11.0, 'gust_kph': 17.7}}


In [10]:
import json
from pygments import highlight, lexers, formatters

# parse JSON
parsed_json = json.loads(data.replace("'", '"'))

# pretty print JSON with syntax highlighting
formatted_json = json.dumps(parsed_json, indent=4)
colorful_json = highlight(formatted_json,
                          lexers.JsonLexer(),
                          formatters.TerminalFormatter())

print(colorful_json)


{[37m[39;49;00m
[37m    [39;49;00m[94m"location"[39;49;00m:[37m [39;49;00m{[37m[39;49;00m
[37m        [39;49;00m[94m"name"[39;49;00m:[37m [39;49;00m[33m"Faisalabad"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"region"[39;49;00m:[37m [39;49;00m[33m"Punjab"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"country"[39;49;00m:[37m [39;49;00m[33m"Pakistan"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lat"[39;49;00m:[37m [39;49;00m[34m31.42[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lon"[39;49;00m:[37m [39;49;00m[34m73.08[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"tz_id"[39;49;00m:[37m [39;49;00m[33m"Asia/Karachi"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime_epoch"[39;49;00m:[37m [39;49;00m[34m1722717032[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime"[39;49;00m:[37m [39;49;00m[33m"2024-08-04 1:30"[39;49;00m[37m[39;49;00m
[37m    [39;49

<img src="./google_sample.png" width="800" height="600">