In [2]:
# libraries
from dotenv import load_dotenv
import os
from tavily import TavilyClient

# load environment variables from .env file
_ = load_dotenv()

# connect
client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))

In [3]:
# run search
result = client.search("What is in Nvidia's new Blackwell GPU?",
                       include_answer=True)

# print the answer
result["answer"]

"The new Blackwell GPU from Nvidia, specifically the Blackwell B200, is the company's most powerful single-chip GPU with 208 billion transistors. It is designed for AI applications, boasting the capability to reduce AI inference operating costs and energy consumption by up to 25 times when compared to the H100. The Blackwell GPU architecture is set to power the RTX 50-series graphics cards and is expected to be adopted by major organizations such as Amazon Web Services, Dell Technologies, Google, Meta, Microsoft, OpenAI, Oracle, Tesla, and xAI."

# Regular search

In [6]:
city = "Montreal"

query = f"""
    What is the current weather in {city}?
    Should I go outside today?
"""

In [7]:
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
import re

ddg = DDGS()

def search(query, max_results=6):
    try:
        results = ddg.text(query, max_results=max_results)
        return [i["href"] for i in results]
    except Exception as e:
        print(f"returning previous results due to exception reaching ddg.")
        results = [ # cover case where DDG rate limits due to high deeplearning.ai volume
            "https://weather.com/weather/today/l/USCA0987:1:US",
            "https://weather.com/weather/hourbyhour/l/54f9d8baac32496f6b5497b4bf7a277c3e2e6cc5625de69680e6169e7e38e9a8",
        ]
        return results  


for i in search(query):
    print(i)

https://www.accuweather.com/en/ca/montreal/h3a/weather-today/56186
https://www.accuweather.com/en/ca/montreal/h3a/current-weather/56186
https://www.accuweather.com/en/ca/montreal/h3a/weather-forecast/56186
https://www.theweathernetwork.com/ca/weather/quebec/montreal
https://weather.gc.ca/city/pages/qc-147_metric_e.html
https://weather.gc.ca/forecast/hourly/qc-147_metric_e.html


In [8]:
def scrape_weather_info(url):
    """Scrape content from the given URL"""
    if not url:
        return "Weather information could not be found."
    
    # fetch data
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return "Failed to retrieve the webpage."

    # parse result
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup


In [9]:
# use DuckDuckGo to find websites and take the first result
url = search(query)[0]

# scrape first wesbsite
soup = scrape_weather_info(url)

print(f"Website: {url}\n\n")
print(str(soup.body)[:50000]) # limit long outputs

Website: https://www.accuweather.com/en/ca/montreal/h3a/weather-today/56186


<body class="daily-detail-daynight ads-not-loaded full-animation rfphrase-disabled">
<div class="template-root" style="display:none">
<div class="basic-header" style="visibility: hidden;">
<a class="pwa-top-banner" href="/pwa">
<svg class="arrow-icon" height="18" viewbox="0 0 18 18" width="18" xmlns="http://www.w3.org/2000/svg"><defs><path d="m8.495.505 5 5v.99l-5 5-.99-.99 3.805-3.806L0 6.7V5.3l11.31-.001-3.805-3.804.99-.99z" id="a"></path></defs><use fill="#fff" fill-rule="nonzero" transform="translate(2 3)" xlink:href="#a"></use></svg>
		Go Back
	</a>
<div class="header-outer">
<div class="header-inner">
<a class="header-logo" href="/">
<svg data-eager="" height="24" viewbox="0 0 189 27" width="168" xmlns="http://www.w3.org/2000/svg"><g fill="none"><path d="M183.058 22.739v-6.864c0-1.4.293-2.342.879-2.83.585-.488 1.561-.716 2.863-.716h1.269v-3.09h-1.074c-.976 0-1.822.13-2.505.422a4.054 4.054 0 0 0-1.66 1.2

In [10]:
# extract text
weather_data = []
for tag in soup.find_all(['h1', 'h2', 'h3', 'p']):
    text = tag.get_text(" ", strip=True)
    weather_data.append(text)

# combine all elements into a single string
weather_data = "\n".join(weather_data)

# remove all spaces from the combined text
weather_data = re.sub(r'\s+', ' ', weather_data)
    
print(f"Website: {url}\n\n")
print(weather_data)

Website: https://www.accuweather.com/en/ca/montreal/h3a/weather-today/56186


Montreal, Quebec Montreal Quebec Around the Globe Around the Globe Hurricane Tracker Severe Weather Radar & Maps News & Features Astronomy Business Climate Health Recreation Sports Travel Video Winter Center Day Max UV Index 4 Moderate Wind SW 11 km/h Wind Gusts 35 km/h Probability of Precipitation 84% Probability of Thunderstorms 26% Precipitation 7.9 mm Rain 7.9 mm Hours of Precipitation 4 Hours of Rain 4 Cloud Cover 76% Morning Afternoon Night Wind WSW 9 km/h Wind Gusts 33 km/h Probability of Precipitation 40% Probability of Thunderstorms 8% Precipitation 0.5 mm Rain 0.5 mm Hours of Precipitation 1 Hours of Rain 1 Cloud Cover 25% Evening Overnight Sun & Moon Temperature History Further Ahead Further Ahead Hourly Daily Monthly Around the Globe Around the Globe Hurricane Tracker Severe Weather Radar & Maps News Video Winter Center Top Stories Severe Weather Severe storms and downpours to return to south-cent

# Using the agentic search

In [11]:
# run search
result = client.search(query, max_results=1)

# print first result
data = result["results"][0]["content"]

print(data)

{'location': {'name': 'Montreal', 'region': 'Quebec', 'country': 'Canada', 'lat': 45.5, 'lon': -73.58, 'tz_id': 'America/Toronto', 'localtime_epoch': 1717764644, 'localtime': '2024-06-07 8:50'}, 'current': {'last_updated_epoch': 1717764300, 'last_updated': '2024-06-07 08:45', 'temp_c': 19.1, 'temp_f': 66.4, 'is_day': 1, 'condition': {'text': 'Partly cloudy', 'icon': '//cdn.weatherapi.com/weather/64x64/day/116.png', 'code': 1003}, 'wind_mph': 8.1, 'wind_kph': 13.0, 'wind_degree': 250, 'wind_dir': 'WSW', 'pressure_mb': 999.0, 'pressure_in': 29.51, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 83, 'cloud': 25, 'feelslike_c': 19.1, 'feelslike_f': 66.4, 'windchill_c': 17.5, 'windchill_f': 63.5, 'heatindex_c': 17.5, 'heatindex_f': 63.5, 'dewpoint_c': 13.8, 'dewpoint_f': 56.9, 'vis_km': 24.0, 'vis_miles': 14.0, 'uv': 5.0, 'gust_mph': 9.8, 'gust_kph': 15.7}}


In [12]:
import json
from pygments import highlight, lexers, formatters

# parse JSON
parsed_json = json.loads(data.replace("'", '"'))

# pretty print JSON with syntax highlighting
formatted_json = json.dumps(parsed_json, indent=4)
colorful_json = highlight(formatted_json,
                          lexers.JsonLexer(),
                          formatters.TerminalFormatter())

print(colorful_json)

{[37m[39;49;00m
[37m    [39;49;00m[94m"location"[39;49;00m:[37m [39;49;00m{[37m[39;49;00m
[37m        [39;49;00m[94m"name"[39;49;00m:[37m [39;49;00m[33m"Montreal"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"region"[39;49;00m:[37m [39;49;00m[33m"Quebec"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"country"[39;49;00m:[37m [39;49;00m[33m"Canada"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lat"[39;49;00m:[37m [39;49;00m[34m45.5[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lon"[39;49;00m:[37m [39;49;00m[34m-73.58[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"tz_id"[39;49;00m:[37m [39;49;00m[33m"America/Toronto"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime_epoch"[39;49;00m:[37m [39;49;00m[34m1717764644[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime"[39;49;00m:[37m [39;49;00m[33m"2024-06-07 8:50"[39;49;00m[37m[39;49;00m
[37m    [39;49;

: 