# Getting 48h weather forecast and past 7d weather from NWS via webscraping

This script is intended to collect data from the National Weather Service (NWS) into a **_pandas_** dataframe and to create a simple overview figure using **_matplotlib_**. I am later planning to run this script periodically to create an updated figure. 

Data sources: 
1. [NWS Metar Reports](https://www.wrh.noaa.gov/zoa/getobext.php?sid=KCHO)
2. [NWS Hourly Forecast](https://forecast.weather.gov/MapClick.php?lat=38.1386&lon=-78.4528&lg=english&&FcstType=digital)

In [19]:
import requests
import sys
import pandas as pd
import re 
from bs4 import BeautifulSoup


In [4]:
SiteID = "KCHO"
ReportSite = 'https://www.wrh.noaa.gov/zoa/getobext.php?sid=' + SiteID 


In [46]:
TimeNow = pd.to_datetime('today')
YearNow = TimeNow.year


In [57]:
# Function definition is here
def ScrapeMetReport(ReportPage):
    "Scrape MetReport Data from NWS for Site with ID and return df(data), str(time of observation), and (coordinates)"
    soup = BeautifulSoup(ReportPage.content, 'html.parser')
    # Extract data from table body
    table_body = soup.find('table',class_="inner-timeseries")
    rows = table_body.find_all('tr')
    tabs=[]
    HeaderLines = 3
    ColumnNames1 = ['Time', 'Temp', 'Dewpoint', 'RelHum', 'WindDir', 'WindSp', 'Visibility', 'WX', 'Clouds', 'SLP', 'Altimeter',
               'StationP', '6h TMAX', '6h TMIN', '24h TMAX', '24h TMIN','QC']
    ColumnNames2 = ['Time', 'Temp', 'Dewpoint', 'RelHum', 'WindDir', 'WindSp', 'Visibility', 'WX', 'Clouds', 'SLP', 'Altimeter',
               'StationP', 'P1h','P3h','P6h','P24h','6h TMAX', '6h TMIN', '24h TMAX', '24h TMIN','QC']

    for row in rows[HeaderLines:]:
        cols=row.find_all('td')
        cols=[x.text.strip() for x in cols]
        tabs.append(cols)
    # and write data to dataframe
    if (len(cols)==17):
        df = pd.DataFrame(tabs, columns=ColumnNames1) 
    elif (len(cols) == 21):
        df = pd.DataFrame(tabs, columns=ColumnNames2) 
    else:
        raise Exception('Unexpected number of data columns MetReport')
        
    # Adjust to proper date
    YDiff = YearNow-1900
    df['Time'] = pd.to_datetime(df['Time'], format='%d %b %H:%M %p')
    df['Time'] = df['Time'].apply(lambda x: x + pd.DateOffset(years=YDiff))
    
    # Extract time of observations from website
    table_body = soup.find_all('table')
    rows = table_body[1].find_all('tr')
    cols=rows[2].find_all('td')
    cols=rows[3].find_all('td')
    TimeObs = cols[1].get_text()
    
    # Extract Coordinates 
    LatLonStr = table_body[1].find_all(text=re.compile("Latitude"))
    LatLonStr = re.split(' |;',LatLonStr[0] )
    Coord = tuple([w for w in LatLonStr if re.search('-?[0-9]{1,3}(?:\.[0-9]{1,10})', w)])

    return df, TimeObs, Coord 
    

In [58]:

try:
    # Open page and parse to soup
    ReportPage = requests.get(ReportSite)
    RepDf, TimeObs, SiteCoord =ScrapeMetReport(ReportPage)
except:
    print('Failed to read Met Report - Something went wrong')
    sys.exit()

In [59]:

ForecastSite = 'https://forecast.weather.gov/MapClick.php?lat={:6.4f}&lon={:6.4f}&lg=english&&FcstType=digital'.format(float(SiteCoord[0]),float(SiteCoord[1]))
ForecastSite

'https://forecast.weather.gov/MapClick.php?lat=38.1374&lon=-78.4552&lg=english&&FcstType=digital'

Now parse site to objects readable by Beautiful Sope

In [5]:
ReportPage = requests.get(ReportSite)
ForecastPage = requests.get(ForecastSite)


In [6]:
Status = [ReportPage.status_code, ForecastPage.status_code]
Status

[200, 200]

In [61]:
ForecastPage = requests.get(ForecastSite)
soup = BeautifulSoup(ForecastPage.content, 'html.parser')
print(soup.prettify())

<!DOCTYPE doctype html public "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
 <head>
  <title>
   Tabular Weather Forecast for 38.14N 78.46W (Elev. 587 ft)
  </title>
  <link href="fonts/main.css" rel="STYLESHEET" type="text/css"/>
 </head>
 <body background="#FFFFFF" leftmargin="0" marginheight="0" marginwidth="0" rightmargin="0" topmargin="0">
  <table background="/images/wtf/topbanner.jpg" border="0" cellpadding="0" cellspacing="0" width="100%">
   <tr>
    <td align="right" height="19">
     <a href="#contents">
      <img alt="Skip Navigation Links" border="0" height="1" src="/images/wtf/skipgraphic.gif" width="1"/>
     </a>
     <a href="http://weather.gov">
      <span class="nwslink">
       weather.gov
      </span>
     </a>
    </td>
   </tr>
  </table>
  <table border="0" cellpadding="0" cellspacing="0" width="100%">
   <tr>
    <td rowspan="2">
     <a href="http://www.noaa.gov">
      <img alt="NOAA logo - Click to go to the NOAA homepage" border="0" height="78" src="/i

In [118]:
test =soup.find_all('table', {'width':"800"})
rows = test[2].find_all('tr')
tabs = []
for row in rows:
        cols=row.find_all('td')
        cols=[x.text.strip() for x in cols]
        tabs.append(cols)

tabs
print(list(test[2].children)[-1])


<tr align="center"><td align="left" width="5%"><font color="#FF0000" size="1">Thunder</font></td><td align="center" width="3%"><font color="#FF0000" size="1"><b>Chc</b></font></td><td align="center" width="3%"><font color="#FF0000" size="1"><b>Chc</b></font></td><td align="center" width="3%"><font color="#FF0000" size="1"><b>Lkly</b></font></td><td align="center" width="3%"><font color="#FF0000" size="1"><b>Lkly</b></font></td><td align="center" width="3%"><font color="#FF0000" size="1"><b>Lkly</b></font></td><td align="center" width="3%"><font color="#FF0000" size="1"><b>Lkly</b></font></td><td align="center" width="3%"><font color="#FF0000" size="1"><b>Chc</b></font></td><td align="center" width="3%"><font color="#FF0000" size="1"><b>Chc</b></font></td><td align="center" width="3%"><font color="#FF0000" size="1"><b>Chc</b></font></td><td align="center" width="3%"><font color="#FF0000" size="1"><b>SChc</b></font></td><td align="center" width="3%"><font color="#FF0000" size="1"><b>SChc

In [106]:
table = soup.find_all('table')
rows = table[9].find_all('tr')
tabs = []
for row in rows:
        cols=row.find_all('td')
        cols=[x.text.strip() for x in cols]
        tabs.append(cols)

tabs

[['International System of Units', 'Forecast Discussion'],
 ['7-Day Forecast', 'Hourly Weather Graph'],
 [''],
 ['Hazardous Weather', 'Briefing Page'],
 ['Past Weather Information', 'Interactive Forecast Map'],
 ['Home']]

In [99]:
len(table)

11