In [1]:
# import the dependencies

from bs4 import BeautifulSoup
import requests
import datetime
from urllib.parse import urljoin

# Methodology for the Weather Maps

The Weather Prediction Center of the National Weather Service (NOAA) maintains historical weather maps including analyses with radar and satellite imagery.  We can scrape these maps with BeautifulSoup once we inspect the HTML elements of the site:
[https://www.wpc.ncep.noaa.gov/archives/web_pages/sfc/sfc_archive_maps.php?arcdate=07/04/2019&selmap=2019070421&maptype=ussatsfc](https://www.wpc.ncep.noaa.gov/archives/web_pages/sfc/sfc_archive_maps.php?arcdate=07/04/2019&selmap=2019070421&maptype=ussatsfc)  

We can then adjust the dates and hour in the url to select the maps for each snow event.  The coding of the dates is straight forward.  The inclusion of time is subtle - the maps are only provided in 3 hour increments starting with 00, 03, 06, 09, etc.


In [2]:
def build_weather_url(date, hour):
    """Build up the url from the date and hour"""
    base_url = "https://www.wpc.ncep.noaa.gov/archives/web_pages/sfc/sfc_archive_maps.php?arcdate="
    mid_url = "&selmap="
    tail_url = "&maptype=ussatsfc"
    
    arcdate = date.strftime("%m/%d/%Y")
    seldate = date.strftime("%Y%m%d")
    selmap = seldate + str(hour).zfill(2)
    
    return base_url + arcdate + mid_url + selmap + tail_url

In [3]:
def get_weather_gif_url(date, hour):
    """Grab the url for the satellite/surface image"""
    page_url = build_weather_url(date, hour)
    
    # define a default gif_url that will be returned if request fails
    gif_url = ""
    
    try:
        page_response = requests.get(page_url,timeout=5)
        page_response.raise_for_status()
    except requests.exceptions.HTTPError as errh:
        print ("Http Error:",errh)
    except requests.exceptions.ConnectionError as errc:
        print ("Error Connecting:",errc)
    except requests.exceptions.Timeout as errt:
        print ("Timeout Error:",errt)
    except requests.exceptions.RequestException as err:
        print ("OOps: Something Else",err)
    else:
        page_content = BeautifulSoup(page_response.content, "html.parser")
    
        image_rel_url = page_content.find_all('img',attrs={"class":"sfcmapimage"})[0]["src"]

        gif_url = urljoin(page_url, image_rel_url)
    
    return gif_url


In [4]:
get_weather_gif_url(datetime.datetime(2018, 1, 22, 0, 0), 12)

'https://www.wpc.ncep.noaa.gov/archives/sfc/2018/ussatsfc2018012212.gif'

In [5]:
def download_weather_gif(date, hour, emergency):
    """Download the satellite/surface image"""
    
    gif_url = get_weather_gif_url(date, hour)
    
    seldate = date.strftime("%Y%m%d")
    selmap = "static/img/" + emergency + seldate + str(hour).zfill(2) + '.gif'
    
    with open(selmap, 'wb') as gif_file:
        gif_file.write(requests.get(gif_url).content)


In [6]:
download_weather_gif(datetime.datetime(2018, 1, 22, 0, 0), 12, 'Pembina')

In [7]:
import os

In [9]:
import pandas as pd

In [10]:
episodes_df = pd.read_csv('static/data/episodes.csv')

In [11]:
episodes_df.head()

Unnamed: 0,emergency,event_id,county,storm_begin_date,begin_time,declaration_date,type,episode_id,narrative
0,Grant,606010.0,Scott,12/28/2015,18,12/29/2015,Winter Storm,101309.0,A very large storm system that began in the so...
1,Polk,610886.0,Hennepin,2/2/2016,12,2/2/2016,Winter Storm,102020.0,A major winter storm affected the Upper Midwes...
2,Dana,662543.0,Carver,12/10/2016,12,12/11/2016,Winter Storm,111012.0,A storm system began to develop across the Pla...
3,Ferry,662658.0,Hennepin,12/16/2016,15,12/17/2016,Winter Storm,111065.0,A long duration snow storm that occurred from ...
4,Jane,,,1/9/2017,12,1/11/2017,,,


In [12]:
import datetime
from dateutil.parser import parse

def convert_dates(dates):
    def try_to_parse_date (a_string):
        try:
            parsed = parse(a_string, fuzzy_with_tokens=True)
        except ValueError:
            print(f"Could not parse a date from `{a_string}`")
        else:
            return parsed[0]
        
    def converter(date):
        if isinstance(date, datetime.datetime):
            return date
        else:
            return try_to_parse_date(date)
    
    return [converter(date) for date in dates]


In [13]:
episodes_df['storm_begin_date'] = convert_dates(episodes_df['storm_begin_date'])

In [14]:
episodes_df['declaration_date'] = convert_dates(episodes_df['declaration_date'])

In [15]:
# We could do some pre-scraping to save the url for the image in the dataframe

gif_url = []

for row in episodes_df.itertuples():
    gif_url.append( get_weather_gif_url(row.storm_begin_date, row.begin_time) )
    
episodes_df['gif_url'] = gif_url

In [16]:
# As a backup, we could also pre-scrape the images:

for row in episodes_df.itertuples():
    download_weather_gif(row.storm_begin_date, row.begin_time, row.emergency)

In [17]:
os.listdir('static/img')

['Armatage2019022409.gif',
 'Dana2016121012.gif',
 'Diamond Lake2019030915.gif',
 'Ferry2016121615.gif',
 'Grant2015122818.gif',
 'Howe2018041321.gif',
 'Howe22018041409.gif',
 'Jane2017010912.gif',
 'Olive2018011412.gif',
 'Pembina2018012209.gif',
 'Pembina2018012212.gif',
 'Polk2016020212.gif',
 'Quincy2019012718.gif',
 'Upton2019020706.gif',
 'Wesminster2019021006.gif',
 'Xerxes2018022212.gif',
 'Yale2019022003.gif',
 'Yardville2018022415.gif']

# In conclusion:

We can define an app route that fetches the historical weather maps using the above functions.  We could pull the image each time or load from the `static/img` directory. Another option would allow the user to pick the date and time (recall the time increments 00, 03, 06, 09) for the weather map from a menu.

In [18]:
# Save the work we did to convert the dates and fetch the URLs
episodes_df.to_csv('static/data/finalEpisodes.csv')