In [12]:
import requests
from bs4  import BeautifulSoup
import pandas as pd

In [13]:
# Load the page
url = 'https://forecast.weather.gov/MapClick.php?x=276&y=148&site=lox&zmx=&zmy=&map_x=276&map_y=148#.YitDzujMLIU'

page = requests.get(url)

# Checking if the request was successful
print(page)


# Preparing the soup :)
soup = BeautifulSoup(page.content, 'html.parser')


<Response [200]>


In [14]:
# Class names
weekday_class = 'col-sm-2 forecast-label'
weather_description_class = 'col-sm-10 forecast-text'

# Extracting information I need
raw_data = {weekday_class: [],
        weather_description_class: []
        }

for tag_class in raw_data:
    tags= soup.find_all(class_ = tag_class)
    for element in tags:
        raw_data[tag_class].append( element.text )


In [15]:
# raw_data into well formatted data frame
data = pd.DataFrame(raw_data)

# reset column names to meaningful ones
data.columns = ['weekday', 'description']

In [16]:
yesterday = pd.to_datetime("today") - pd.Timedelta(1, unit = 'D')

date = pd.Series((pd.date_range(start= yesterday, periods=8 )
        .repeat(2) # repeat each datetime by 2 to account for day and night pattern
        .date))   

# comply with website change of weekday and date 
if data['weekday'][0].lower() == 'overnight':
    data['date'] = date[1:-1].reset_index(drop=True)
    
else:
    data['date'] = date[2:]

In [18]:
# Helper function for converting Fahrenheit to Celsius
def fahrenheit_to_celsius(fahrenheit):
    temp_c = ((float(fahrenheit) - 32) * 5 ) / 9
    return round( temp_c, 2 )

# extracting temperature, which is embedded within the description column

temp = (data['description']       
            .str.extract(r'(\d+)')                          # extract the first occuring whole digit (returns a DataFrame, not a Series object)                                    
            .applymap(lambda x: fahrenheit_to_celsius(x))   # convert from Fahrenheit to Celsius
        )

# add temp to data

data["temp_celsius"] = temp


In [19]:
# temperature state (i.e. low or high)
# this information is embedded in the description column


temp_state = (data['description']                       # grab `description` column
                    .str.lower()                        # lower all characters for each description (row)
                    .str.contains('low')                # check for the presence of `low` within each description (True or False)
                    .map({True:'low', False:'high'})    # change `True` -> 'low' and `False` -> `high`
            )

# Adding `temp_state` to data
data['temp_state'] = temp_state

In [20]:
# Reorder column positions and final overview for check

data = data.reindex(columns= ['date', 'weekday', 'temp_celsius', 'temp_state','description'])

data

Unnamed: 0,date,weekday,temp_celsius,temp_state,description
0,2022-03-12,Overnight,11.67,low,"Partly cloudy, with a low around 53. East nort..."
1,2022-03-13,Sunday,22.22,high,"Mostly sunny, with a high near 72. East wind 5..."
2,2022-03-13,Sunday Night,10.0,low,"Mostly clear, with a low around 50. Northwest ..."
3,2022-03-14,Monday,24.44,high,"Sunny, with a high near 76. Northeast wind 5 t..."
4,2022-03-14,Monday Night,12.22,low,"Mostly clear, with a low around 54. West wind ..."
5,2022-03-15,Tuesday,24.44,high,"Mostly sunny, with a high near 76."
6,2022-03-15,Tuesday Night,11.11,low,"Partly cloudy, with a low around 52."
7,2022-03-16,Wednesday,26.11,high,"Sunny, with a high near 79. Breezy."
8,2022-03-16,Wednesday Night,12.22,low,"Mostly clear, with a low around 54."
9,2022-03-17,Thursday,27.22,high,"Mostly sunny, with a high near 81."
