In [1]:
# do any pip installs in this cell

In [2]:
!python -m pip install folium

Defaulting to user installation because normal site-packages is not writeable
Collecting folium
  Downloading folium-0.14.0-py2.py3-none-any.whl (102 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.3/102.3 kB[0m [31m52.7 kB/s[0m eta [36m0:00:00[0m kB/s[0m eta [36m0:00:01[0m:02[0m
[?25hCollecting branca>=0.6.0 (from folium)
  Downloading branca-0.6.0-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.6.0 folium-0.14.0


In [3]:
import requests
from bs4 import BeautifulSoup
import time
import json
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Image
import folium
from folium.plugins import FastMarkerCluster

In [4]:
APIDATA_EXISTS = os.path.isfile('apidata.json')

print(APIDATA_EXISTS)

True


In [4]:
# pull addresses

url = 'https://www.wrtv.com/news/local-news/crime/people-weve-lost-these-are-the-indianapolis-homicide-victims-of-2022'

html = requests.get(url).content
soup = BeautifulSoup(html)

# print(soup)

In [5]:
# parse addresses

paragraphs = soup.find_all('p')
paragraphs = list(filter(lambda p: len(p.contents) > 10, paragraphs))

addresses = [paragraph.contents[6].text if (not paragraph.contents[6].text.startswith('What happened:')) else paragraph.contents[4].text for paragraph in paragraphs]
addresses = list(filter(lambda addr: addr != 'Unknown location', addresses))

# as a sanity check, the below should not contain entries like "What happened:", only addresses without digits
bad_addr = list(filter(lambda addr: sum([not ch.isdigit() for ch in addr]) == len(addr), addresses))

print(len(addresses)) # Should be 216 per the article, but is actually 215 because of one unknown address

215


In [5]:
# convert addresses to longitude/lattitude
# if APIDATA_EXISTS is False, query the API; otherwise, read from apidata.json

if not APIDATA_EXISTS:
    apidata = []
    url = 'https://maps.googleapis.com/maps/api/geocode/json?address={}&key={}'
    API_KEY = 'go find one' # available on Google Cloud

    for _ in range(len(addresses)):
        address = requests.utils.quote(addresses[_] + ', Indiana')

        resp = requests.get(url.format(address, API_KEY))
        resp = json.loads(resp.text)

        apidata.append(resp)

        if _ % 10 == 0:
            print(f'Finished Address {_}/{len(addresses)}; Waiting 1 Second')
            time.sleep(1)
    print(f'Finished Address {len(addresses)}/{len(addresses)}; Backing up to apidata.json')
    
    with open('apidata.json', 'w') as f:
        json.dump(apidata, f, indent=4) # backups results of maps api to apidata.json
        
else:
    with open('apidata.json') as f:
        apidata = json.load(f)
    print('apidata.json loaded into memory')

apidata.json loaded into memory


In [6]:
# extracts coordinates from apidata into geolocs

geolocs = []

for resp in apidata:
    coords = resp['results'][0]['geometry']['location']
    geolocs.append((coords['lat'], coords['lng']))
    
# print(geolocs)

In [8]:
# plots coordinates on a map

clustering = True

homicide_map = folium.Map((geolocs[0][0], geolocs[0][1]), zoom_start=13, prefer_canvas=True)

if clustering:
    homicide_map.add_child(FastMarkerCluster(geolocs))
else:
    for pt in geolocs:
        marker = folium.Marker([pt[0], pt[1]]) #latitude,longitude
        homicide_map.add_child(marker) 
        
homicide_map

In [9]:
print(homicide_map._repr_html_())

<div style="width:100%;"><div style="position:relative;width:100%;height:0;padding-bottom:60%;"><span style="color:#565656">Make this Notebook Trusted to load map: File -> Trust Notebook</span><iframe srcdoc="&lt;!DOCTYPE html&gt;
&lt;html&gt;
&lt;head&gt;
    
    &lt;meta http-equiv=&quot;content-type&quot; content=&quot;text/html; charset=UTF-8&quot; /&gt;
    
        &lt;script&gt;
            L_NO_TOUCH = false;
            L_DISABLE_3D = false;
        &lt;/script&gt;
    
    &lt;style&gt;html, body {width: 100%;height: 100%;margin: 0;padding: 0;}&lt;/style&gt;
    &lt;style&gt;#map {position:absolute;top:0;bottom:0;right:0;left:0;}&lt;/style&gt;
    &lt;script src=&quot;https://cdn.jsdelivr.net/npm/leaflet@1.9.3/dist/leaflet.js&quot;&gt;&lt;/script&gt;
    &lt;script src=&quot;https://code.jquery.com/jquery-1.12.4.min.js&quot;&gt;&lt;/script&gt;
    &lt;script src=&quot;https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/js/bootstrap.bundle.min.js&quot;&gt;&lt;/script&gt;
    &l