# Exercise in Web scraping. Display a google map of church towers in Somerset scrapped from a Wikipedia page.

In [1]:
import pickle
import json
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import gmaps
import gmaps.datasets
import os

In [2]:
    
html = urlopen("https://en.wikipedia.org/wiki/List_of_Somerset_towers")
    
res = BeautifulSoup(html.read(),"html5lib");
 
#print(res.title)

In [3]:
# list to hold data about the towers
towers = []

# Each table about the towers is a wikitable sortable 
# use Beautiful soup to load them all
tables = res.findAll("table",{"class" : "wikitable sortable"})
#print("There are " + str(len(tables)) + " tables") # 14/12/2107 13 tables

for table in tables:
# Read in all the rows of the table
    rows = table.findAll("tr")
    # first row defines the columns headers etc
    for row in range(1,len(rows)):
        # get the table enteries <td>
        entries = rows[row].findAll("td")
        # all the required data is stored in the fourth td
        # example
        # churchill
        # 51°20′03″N 2°48′00″W / 51.3342°N 2.8001°W / 51.3342; -2.8001 (Church of St John the Baptist, Churchill)
        # split the entries into two seperate entries
        location = entries[4].text.split('\n')
        # all the data we want is in the second line
        # find the Church's name and location
        nameLoc = re.search("\((.*),\s*(.*)\)",location[1])
        if (nameLoc):
            church = nameLoc.group(1)
            place = nameLoc.group(2)
         # we want the lat/long as digits i.e the third set of lat long            
        coords = location[1].split(' ')
        lat = coords[6].replace(';','') # remove unwanted semi-colon
        longt = coords[7].strip()       # remove unwanted spaces
        
        # the data if negative is decoded incorrectly and has \ufeff at the end of the string
        lat = lat.replace(u'\ufeff', '')
        longt = longt.replace(u'\ufeff', '')
        
        # create a tuple of values and add to the list of towers
        temp = { "name" : church, "location" : place , "lat" : lat, "longt": longt }
        towers.append(temp)

# save to file in json format
with open('churches.json', 'wt') as outfile:
    json.dump(towers, outfile)    

In [4]:
# google api key stored as environmental variable for privacy
key = os.getenv('GMAP_API_KEY')
#print(key)
# set up the google map using the api key
gmaps.configure(api_key=key) # Fill in with your API key


In [5]:
# plot the churches on a google map
# Center on Taunton (County Town)
fig = gmaps.figure(center=(51.01494, -3.10293),zoom_level=8)

locations = []
for tower in towers:
    locations.append(((float(tower["lat"]),float(tower["longt"]))))

heatmap_layer = gmaps.heatmap_layer(locations)
fig.add_layer(heatmap_layer)
fig