# Bonus: Plotting some data from the City of Edmonton data catalog

Initialize plotly as usual:

In [None]:
import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np

py.init_notebook_mode(connected=False)

People using Colab need this:

In [None]:
# Stuff for Colab ...
import sys
def enable_plotly_in_cell():
  import IPython
  from plotly.offline import init_notebook_mode
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
  '''))
  init_notebook_mode(connected=False)

if 'google.colab' in sys.modules:
    get_ipython().events.register('pre_run_cell', enable_plotly_in_cell)

The city maintains a database of where the city owned trees are:

* https://data.edmonton.ca/Environmental-Services/Trees/eecg-fc54
* https://data.edmonton.ca/Environmental-Services/Trees/eecg-fc54/data

It has over 360,000 entries.

This is a bit much, but we can use the data API provided by the city to filter the trees.
The API is documented here:

We will just look at Green Ash trees:

In [None]:
!mkdir -p data
!wget -O data/trees.csv 'https://data.edmonton.ca/resource/eecg-fc54.csv?species=Ash,%20Green&%24limit=100000'

In [None]:
df = pd.read_csv('data/trees.csv')
print(len(df))
df.head()

Let's see the range of the latitude values for the trees:

In [None]:
y_range = [df.latitude.min(), df.latitude.max()]
y_range

... and the range of the longitude values:

In [None]:
x_range = [df.longitude.min(), df.longitude.max()]
x_range


Lets make a heatmap to see how these trees are distributed!

We will first want to count the trees in a 2D array of bins that span the longitude/latitude of the city.

In [None]:
width = 40
height = 40

tree_counts = np.zeros(shape=(width,height))

Here is a general purpose function that will decide what bin a tree will be counted in:

In [None]:
def bin_value(value, minval, maxval, resolution):
    return int(round((resolution-1) * (value - minval) / (maxval - minval)))

Now we iterate over the trees to sum up the bins:

In [None]:
for index, row in df.iterrows():
    x = row.longitude
    y = row.latitude
    i = bin_value(x, x_range[0], x_range[1], width)
    j = bin_value(y, y_range[0], y_range[1], height)
    # Need to flip the Y direction because heatmaps plot down ward?
    j = height - 1 - j
    tree_counts[i][j] += 1

In [None]:
tree_counts

And finally, the heatmap!

In [None]:
data = [
    go.Heatmap(
        z=crabapple_counts,
    )
]
layout = {
  'xaxis': {
    'constrain': 'domain'
  }, 
  'yaxis': {
    'scaleanchor': 'x'
  }
}
figure = go.Figure(data=data, layout=layout)
py.iplot(figure)

### To do: overlay a map somehow!

For now, lets compare with a google map.

In [None]:
import IPython
iframe = '<iframe src="https://www.google.com/maps/embed?pb=!1m14!1m12!1m3!1d86756.31186242377!2d-113.47701978727787!3d53.52751941579225!2m3!1f0!2f0!3f0!3m2!1i1024!2i768!4f13.1!5e0!3m2!1sen!2sca!4v1569534170492!5m2!1sen!2sca" width="600" height="450" frameborder="0" style="border:0;" allowfullscreen=""></iframe>'
IPython.display.HTML(iframe)