CNFAIC Observation Map
Goal:
Make a map of recent (<7 days) observations.  

Ideas:
- Custom date range
- Icons based on dates
- Professional only mode

Steps:
- Load observations and relevant data
- Assign location to observation
- Map observation
    - Format tags and popups
    
To Do:
- Make popups nice
- Add wx with different markers
- github
- hosting?

In [1]:
#Load libraries
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import folium
import numpy as np
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import datetime
import folium.plugins as plugins

In [2]:
#Observation dataframe
Obs = pd.DataFrame(columns = ('Location', 'Date', 'Observer','Lat','Lon'))
Obs

#read observation archive

obsArchive = pd.read_pickle('./obsArchive.pkl')

In [3]:
def getObs(url):
    #Open url and convert to soup
    html = urlopen(url).read()
    soup = BeautifulSoup(html)
    with open('./Observations/'+ url[36:-1], 'a') as file: file.write(str(soup))
    
    #Pull location
    location = str(soup.find('title'))
    location = location[7:location.find('|') - 1]
    
    #Pull observer info, first check for anonymous report
    if str(soup.select_one(
        'div[ class *= cnfaic_obs-table-browse-observations-byline]')) \
        == '<div class="cnfaic_obs-table-browse-observations-byline">Anonymous</div>':
        observer = 'Anonymous'
    else:
        observer = soup.select_one("span[class *= cnfaic_obs-table-browse-observations-byline]").text
        observer = observer[:-1]
        
    #Pull date
    date = soup.select('div[ class *= "top_meta"]')
    date = date[1].text
    formattedDate = formatTime(date)
    
    #Pull coordinates
    if len(soup.find_all("a", href=lambda href: href and "google" in href)) > 0:
        links = soup.find_all("a", href=lambda href: href and "google" in href)
        coords = str(links[0])
        coords = coords[coords.find('q='):coords.find("'>")]
        lat = float(coords[coords.find('=') + 1 : coords.find(',') - 1])
        lon = float(coords[coords.find(",") + 1 : coords.find('target') - 2])
    else:
        lat = float('NaN')
        lon = float('NaN')
        
    #Red Flags
    if soup.find_all('tr') != []:
        rows = soup.find_all('tr')
        flags = []
        for row in rows:
            cols = row.find_all('td')
            cols = [ele.text.strip() for ele in cols]
            flags.append([ele for ele in cols if ele]) 

        recentAvy = flags[0][1] == 'Yes'
        collapsing = flags[1][1] == 'Yes'
        cracking = flags[2][1] == 'Yes'
    else:
        recentAvy = False
        collapsing = False
        cracking = False
    
    #Create dataframe from data
    d = {'Date':[formattedDate],'Location':[location], 'Observer':[observer],'Lat':[lat], 'Lon': [lon], 
         'Recent Avy':[recentAvy], 'Collapsing' : [collapsing], 'Cracking' : [cracking],
         'url' : [url]}
    oneObs = pd.DataFrame(data = d)
    
    #Pull red flags

    return(oneObs)

    

In [4]:
def getUrls():
    url = 'https://www.cnfaic.org/view-observations/'
    html = urlopen(url).read()
    soup = BeautifulSoup(html)
    table = soup.find('table')
    links = table.find_all('a')
    urls = list()
    gallery = 'gallery'
    i = 0
    for link in range(len(links)):
        if gallery in str(links[link]):
            i = i
        else:
            url = str(links[link])
            url = url[ 9 : url.find('>') - 1]
            if (obsArchive['url'] == url).any():
                i = i
            else:
                urls.append(url)
                i += 1
    return(urls)

    

In [5]:
def getNewObs():
    newObs = pd.DataFrame()
    urls = getUrls()
    for i in range(len(urls)):  #Changed this
        newObs = newObs.append(getObs(urls[i]))
    
    newObs.reset_index(inplace = True, drop = True)
    newObs['url'] = urls
    return(newObs)    

In [6]:
def addNewObs(obsArchive):
    newObs = getNewObs()

    for i in range(len(newObs)):
        obsArchive = obsArchive.append(newObs.iloc[i])       
        #Save the soup to the observations folder
        #with open('./Observations/'+ url[36:-1], 'a') as file: file.write(str(soup))
            
    obsArchive = obsArchive.sort_values('Date', ascending = False)    
    obsArchive.reset_index(inplace = True, drop = True)

    obsArchive.to_pickle('./obsArchive.pkl')
    return(obsArchive)
        

In [7]:
obsArchive = addNewObs(obsArchive)
obsArchive

Unnamed: 0,Date,Location,Observer,Lat,Lon,Recent Avy,Collapsing,Cracking,url,ageGroup
0,2020-12-01 11:15:00,Mount Marathon…East aspect above jeep trail to...,Alex McLain,60.113664,-149.469092,False,False,False,https://www.cnfaic.org/observations/mount-mara...,
1,2020-11-30 08:47:00,Notch Mountain,Anonymous,60.984507,-149.046717,False,False,False,https://www.cnfaic.org/observations/notch-moun...,
2,2020-11-30 07:16:00,Seattle Ridge Flats,Andrew Schauer Forecaster,60.800798,-149.199152,False,False,False,https://www.cnfaic.org/observations/seattle-ri...,
3,2020-11-30 05:52:00,Tincan- above treeline,Eric Roberts,60.788590,-149.166365,False,False,False,https://www.cnfaic.org/observations/tincan-abo...,
4,2020-11-29 12:00:00,Sunburst,Kathy Still,60.794430,-149.199667,False,False,False,https://www.cnfaic.org/observations/sunburst-233/,
5,2020-11-29 09:39:00,South Fork ER valley,Elias Holt,61.221255,-149.408751,False,True,False,https://www.cnfaic.org/observations/south-fork...,
6,2020-11-29 09:30:00,Tenderfoot,Johnston-Bloom / Edwards Forecaster,60.630872,-149.474283,True,True,False,https://www.cnfaic.org/observations/tenderfoot...,
7,2020-11-29 07:06:00,Sunny Side,Peter Wadsworth,60.757714,-149.263225,False,False,True,https://www.cnfaic.org/observations/sunny-side-4/,
8,2020-11-29 04:00:00,Sunny Side,Peter Wadsworth,60.766738,-149.256337,False,False,False,https://www.cnfaic.org/observations/sunny-side-3/,yesterday
9,2020-11-29 02:53:00,Overflight Recon Turnagain Pass,Silverton Mountain Guides,60.794430,-149.199667,False,False,False,https://www.cnfaic.org/observations/overflight...,


In [8]:
"""formattedDates = []
for i in range(len(obsArchive)):
    formattedDates.append(formatTime(obsArchive.iloc[i][0]))

obsArchive['Date'] = formattedDates
    """

"formattedDates = []\nfor i in range(len(obsArchive)):\n    formattedDates.append(formatTime(obsArchive.iloc[i][0]))\n\nobsArchive['Date'] = formattedDates\n    "

In [9]:
def formatTime(dateString):

    i = 0
    calendar = {'Jan':'01', 'Feb':'02', 'Mar':'03', 'Apr':'04', 'May':'05', 'Jun':'06', 'Jul':'07',
               'Aug':'08', 'Sep':'09', 'Oct':'10', 'Nov':'11', 'Dec':'12'}
    month = calendar[dateString[0:3]]

    day = dateString[dateString.find(',')-2:dateString.find(',')]
    if day[0] == ' ':
        day = '0' + day[1]

    year = dateString[dateString.find(',')+2:dateString.find(',')+6]
    year
    if dateString[-1] == 'm':
        time = dateString[-7:]
        if time[0] == ' ':
            time = '0' + time[1:]
    else:
        time = dateString[-5:]
        if time[0] == ' ':
            time = '0' + time[1:]

    dateString = str(year) + '-' + str(month) + '-' + str(day) + ' ' + time
    try:
        datetime.datetime.strptime(dateString, '%Y-%m-%d %H:%M%p')
    except:
        date = datetime.datetime.strptime(dateString, '%Y-%m-%d %H:%M')
    else:
        date = datetime.datetime.strptime(dateString, '%Y-%m-%d %H:%M%p')
    return(date)


In [10]:

# Load USGS
url_base = 'http://server.arcgisonline.com/ArcGIS/rest/services/'
service = 'NatGeo_World_Map/MapServer/tile/{z}/{y}/{x}'
tileset = url_base + service

# Create the map
m = folium.Map(location = [60.79443,-149.199667], zoom_start = 11, tiles = tileset,
               attr='USGS Style')


# Add markers to map
"""for i in range(len(obsArchive)):
    if str(obsArchive.iloc[i][3]) != 'nan':
        folium.Marker([obsArchive.iloc[i][3],obsArchive.iloc[i][4]], popup = 
                      obsArchive.iloc[i][1] + ' ' +
                      '<a href="%s" target="_blank">Link</a>' % obsArchive.iloc[i][8]).add_to(m)
    """

'for i in range(len(obsArchive)):\n    if str(obsArchive.iloc[i][3]) != \'nan\':\n        folium.Marker([obsArchive.iloc[i][3],obsArchive.iloc[i][4]], popup = \n                      obsArchive.iloc[i][1] + \' \' +\n                      \'<a href="%s" target="_blank">Link</a>\' % obsArchive.iloc[i][8]).add_to(m)\n    '

In [11]:
#Filter obs archive into 3 groups based on age of obs
today = datetime.datetime.today()
oneDay = datetime.timedelta(days = 1.5)
threeDays = datetime.timedelta(days = 2.5)
oneWeek = datetime.timedelta(days = 7)
i = 0


for i in range(len(obsArchive)):
    if obsArchive.iloc[i][0] + oneDay > today:
        obsArchive['ageGroup'][i] = 'yesterday'
    elif obsArchive.iloc[i][0] + threeDays > today:
        obsArchive['ageGroup'][i] = '3 day'
    elif obsArchive.iloc[i][0] + oneWeek > today:
        obsArchive['ageGroup'][i] = 'week'
    else:
        obsArchive['ageGroup'][i] = 'old'


obsArchive




Unnamed: 0,Date,Location,Observer,Lat,Lon,Recent Avy,Collapsing,Cracking,url,ageGroup
0,2020-12-01 11:15:00,Mount Marathon…East aspect above jeep trail to...,Alex McLain,60.113664,-149.469092,False,False,False,https://www.cnfaic.org/observations/mount-mara...,yesterday
1,2020-11-30 08:47:00,Notch Mountain,Anonymous,60.984507,-149.046717,False,False,False,https://www.cnfaic.org/observations/notch-moun...,yesterday
2,2020-11-30 07:16:00,Seattle Ridge Flats,Andrew Schauer Forecaster,60.800798,-149.199152,False,False,False,https://www.cnfaic.org/observations/seattle-ri...,yesterday
3,2020-11-30 05:52:00,Tincan- above treeline,Eric Roberts,60.788590,-149.166365,False,False,False,https://www.cnfaic.org/observations/tincan-abo...,yesterday
4,2020-11-29 12:00:00,Sunburst,Kathy Still,60.794430,-149.199667,False,False,False,https://www.cnfaic.org/observations/sunburst-233/,3 day
5,2020-11-29 09:39:00,South Fork ER valley,Elias Holt,61.221255,-149.408751,False,True,False,https://www.cnfaic.org/observations/south-fork...,3 day
6,2020-11-29 09:30:00,Tenderfoot,Johnston-Bloom / Edwards Forecaster,60.630872,-149.474283,True,True,False,https://www.cnfaic.org/observations/tenderfoot...,3 day
7,2020-11-29 07:06:00,Sunny Side,Peter Wadsworth,60.757714,-149.263225,False,False,True,https://www.cnfaic.org/observations/sunny-side-4/,3 day
8,2020-11-29 04:00:00,Sunny Side,Peter Wadsworth,60.766738,-149.256337,False,False,False,https://www.cnfaic.org/observations/sunny-side-3/,week
9,2020-11-29 02:53:00,Overflight Recon Turnagain Pass,Silverton Mountain Guides,60.794430,-149.199667,False,False,False,https://www.cnfaic.org/observations/overflight...,week


In [12]:
dfYes = obsArchive[obsArchive['ageGroup'].isin(['yesterday'])]
df3day = obsArchive[obsArchive['ageGroup'].isin(['3 day'])]
dfWeek = obsArchive[obsArchive['ageGroup'].isin(['week'])]

In [13]:
fgObs = folium.FeatureGroup(name = 'Observations')
m.add_child(fgObs)
gYes = folium.plugins.FeatureGroupSubGroup(fgObs, 'Yesterday')
g3day = folium.plugins.FeatureGroupSubGroup(fgObs, '3 Days')
gWeek = folium.plugins.FeatureGroupSubGroup(fgObs, 'One Week')
subGroups = [gYes, g3day, gWeek]


frames = [dfYes, df3day, dfWeek]
colors = ['red', 'green', 'blue']

for i in range(len(frames)):
    m.add_child(subGroups[i])
    for j in range(len(frames[i])):
        if str(frames[i].iloc[j][3]) != 'nan':
            subGroups[i].add_child(
                folium.Marker([frames[i].iloc[j][3],
                frames[i].iloc[j][4]], 
                popup = frames[i].iloc[j][1] + ' ' +
                '<a href="%s" target="_blank">Link</a>' % frames[i].iloc[j][8],
                icon = folium.Icon(color = colors[i])))

folium.LayerControl().add_to(m)


<folium.map.LayerControl at 0x1181a12b0>

In [15]:
m