In [1]:
# Libraries
from dotenv import load_dotenv
import os
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
import re
from tavily import TavilyClient

# load environment variables from .env file
_ = load_dotenv()

In [2]:
# Connect to Tavily
client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))

In [3]:
# Run Search
result = client.search("What is in Nvidia's new Blackwell GPU?",
                       include_answer=True)

# Print the answer
result["answer"]


"Nvidia's new Blackwell GPU, specifically the Blackwell B200 tensor core chip, is claimed to be the company's most powerful single-chip GPU with 208 billion transistors. It is designed to significantly reduce AI inference operating costs and energy consumption by up to 25 times compared to the H100 GPU. The Blackwell GPUs are expected to deliver a performance improvement of 7 to 30 times over the H100 GPUs, which have been widely used for training AI programs such as ChatGPT. The Blackwell GPUs will be integrated into the GB200 NVL72 server product, combining 72 Blackwell GPUs connected through upgraded NVLink technology to provide a bidirectional throughput of 1.8TB/s."

In [4]:
# choose location (try to change to your own city!)

city = "Rajkot"

query = f"""
    what is the current weather in {city}?
    Should I travel there today?
    "weather.com"
"""

In [5]:


ddg = DDGS()

def search(query, max_results=6):
    try:
        results = ddg.text(query, max_results=max_results)
        return [i["href"] for i in results]
    except Exception as e:
        print(f"returning previous results due to exception reaching ddg.")
        results = [ # cover case where DDG rate limits due to high deeplearning.ai volume
            "https://weather.com/weather/today/l/USCA0987:1:US",
            "https://weather.com/weather/hourbyhour/l/54f9d8baac32496f6b5497b4bf7a277c3e2e6cc5625de69680e6169e7e38e9a8",
        ]
        return results  


for i in search(query):
    print(i)

https://weather.com/en-IN/weather/today/l/Rajkot Gujarat?canonicalCityId=38c6cd6e54963fcebb990dced9a673312f5726790b70b92901b530b1576999fd
https://weather.com/weather/today/l/Rajkot+Gujarat+India?canonicalCityId=38c6cd6e54963fcebb990dced9a673312f5726790b70b92901b530b1576999fd
https://weather.com/en-IN/weather/hourbyhour/l/Rajkot+Gujarat?canonicalCityId=38c6cd6e54963fcebb990dced9a673312f5726790b70b92901b530b1576999fd


In [6]:
def scrape_weather_info(url):
    """Scrape content from the given URL"""
    if not url:
        return "Weather information could not be found."
    
    # fetch data
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return "Failed to retrieve the webpage."

    # parse result
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup


In [7]:
# use DuckDuckGo to find websites and take the first result
url = search(query)[0]

# scrape first wesbsite
soup = scrape_weather_info(url)

print(f"Website: {url}\n\n")

Website: https://weather.com/en-IN/weather/today/l/Rajkot Gujarat?canonicalCityId=38c6cd6e54963fcebb990dced9a673312f5726790b70b92901b530b1576999fd




In [8]:
# Extract text
weather_data = []
for tag in soup.find_all(['h1', 'h2', 'h3', 'p']):
    text = tag.get_text(" ", strip=True)
    weather_data.append(text)

# combine all elements into a single string
weather_data = "\n".join(weather_data)

# remove all spaces from the combined text
weather_data = re.sub(r'\s+', ' ', weather_data)
    
print(f"Website: {url}\n\n")
print(weather_data)

Website: https://weather.com/en-IN/weather/today/l/Rajkot Gujarat?canonicalCityId=38c6cd6e54963fcebb990dced9a673312f5726790b70b92901b530b1576999fd


recents Forecast Science Maps Environment Weather News Activities Health Privacy Special Forecasts Rajkot, Gujarat Today's Forecast for Rajkot, Gujarat Morning Afternoon Evening Overnight Don't Miss Weather Today in Rajkot, Gujarat 07:11 18:02 Hourly Forecast Now 17:30 18:30 19:30 20:30 Daily Forecast Today Wed 04 Thu 05 Fri 06 Sat 07 Radar Top Video Air Quality Index May cause minor breathing discomfort to sensitive people. Health & Activities Seasonal Allergies and Pollen Count Forecast No pollen detected in your area We recognise our responsibility to use data and technology for good. We may use or share your data with our data vendors. Take control of your data. The Weather Channel is the world's most accurate forecaster according to ForecastWatch, Global and Regional Weather Forecast Accuracy Overview , 2017-2022, commissioned by The 

This is good, but can be improved.

In [9]:
# Run Agentic Search
result = client.search(query, max_results=1)
data = result["results"][0]["content"]

In [10]:
import json
from pygments import highlight, lexers, formatters

# parse JSON
parsed_json = json.loads(data.replace("'", '"'))

# pretty print JSON with syntax highlighting
formatted_json = json.dumps(parsed_json, indent=4)
colorful_json = highlight(formatted_json,
                          lexers.JsonLexer(),
                          formatters.TerminalFormatter())

print(colorful_json)


{[37m[39;49;00m
[37m    [39;49;00m[94m"location"[39;49;00m:[37m [39;49;00m{[37m[39;49;00m
[37m        [39;49;00m[94m"name"[39;49;00m:[37m [39;49;00m[33m"Rajkot"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"region"[39;49;00m:[37m [39;49;00m[33m"Gujarat"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"country"[39;49;00m:[37m [39;49;00m[33m"India"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lat"[39;49;00m:[37m [39;49;00m[34m22.3[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lon"[39;49;00m:[37m [39;49;00m[34m70.7833[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"tz_id"[39;49;00m:[37m [39;49;00m[33m"Asia/Kolkata"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime_epoch"[39;49;00m:[37m [39;49;00m[34m1733222958[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime"[39;49;00m:[37m [39;49;00m[33m"2024-12-03 16:19"[39;49;00m[37m[39;49;00m
[37m    [39;49;00m