In [1]:
from bs4 import BeautifulSoup
import requests

url = "https://weather.com/pt-BR/weather/tenday/l/2ba315322f9349de40b5133f33665b38db05f15051a3d00151e2f050d7e07fe2"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

In [None]:
soup.title.string

In [None]:
#detailIndex1
rows = soup.select("#detailIndex0")
len(rows)

In [None]:
# Why 1? Because each row has an ID, #detailIndex0, #detailIndex1...
# But we can use a special type of selector here.
rows = soup.select('[id^="detailIndex"]')
print(len(rows), rows[0].text[:30])
# This is not specific to BeautifulSoup, this is just how CSS selectors works.
# select('[id^="detailIndex"]'): This CSS selector finds all elements where the id attribute starts with detailIndex. 
# The ^= operator matches the beginning of the attribute value.

# You can also use find and use a regex
# import re
# rows_find = soup.find_all(id=re.compile(r'^detailIndex'))
# print(len(rows_find), rows_find[0].text[:30])

In [None]:
# First let's get the day, we know the full selector is:
# #detailIndex1 > summary > div > div > h2
for row in rows:
    day = row.summary.div.div.h2.text
    print(day)


In [48]:
# Let's check now the temperatures...
# #detailIndex1 > summary > div > div > div.DetailsSummary--temperature--1kVVp > span.DetailsSummary--highTempValue--3PjlX
# Ugly selector, what if this 1kVVp changes each page? let's check the HTML...


In [None]:
<div class="DetailsSummary--temperature--1kVVp" data-testid="detailsTemperature">
    <span class="DetailsSummary--highTempValue--3PjlX" data-testid="TemperatureValue" dir="ltr">21<span>°</span>
        <span></span>
    </span>
    <span data-testid="lowTempValue">/
        <span class="DetailsSummary--lowTempValue--2tesQ" data-testid="TemperatureValue" dir="ltr">15<span>°</span>
            <span></span>
        </span>
    </span>
</div>

In [None]:
# Let's use these data-testid attributes. We want to find the first span that has data-testid = TemperatureValue
# rows[1].find_all(name='span')
# You can filter for common HTML args (name, href, id...), to filter for HTML5 custom ones, you do this:
# rows[1].find_all('span', {'data-testid': 'TemperatureValue'})
# rows[1].find('span', {'data-testid': 'TemperatureValue'}).text
# rows[1].find('span', {'data-testid': 'TemperatureValue'}).contents


In [None]:
# Now let's do a similar logic for the min temp. We see that it is inside a span lowTempValue,
# And the rest is similar to the above. So let's first select that lowTempValue
# rows[1].find_all('span', {'data-testid': 'lowTempValue'})
# rows[1].find('span', {'data-testid': 'lowTempValue'}).find('span', {'data-testid': 'TemperatureValue'}).text


In [None]:
# Putting it all together:
for row in rows:
    day = row.summary.div.div.h2.text
    high_temp = row.find('span', {'data-testid': 'TemperatureValue'}).contents[0]
    low_temp = row.find('span', {'data-testid': 'lowTempValue'}).find('span', {'data-testid': 'TemperatureValue'}).contents[0]
    
    print(f"{day=} -> {high_temp=} {low_temp=}")