In [290]:
### Create interactive map of residents living within 400 - 1600 m (+ 250 m because of the grid resolution) walking distance from 
### metro and train station in Finland's capital region with a value slider. Data for the map is loaded from open data services of 
### Maanmittauslaitos (MML) and Helsinki Region Environmental Services Authority (HSY).

### IMPORT DATA ###

# Import modules
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
from geopandas.tools import geocode
import numpy as np
from pyproj import CRS
import requests
import geojson
import matplotlib.pyplot as plt
from shapely.ops import cascaded_union
import mapclassify
import contextily as ctx
from mpl_toolkits.axes_grid1 import make_axes_locatable

import plotly
import plotly.graph_objs as go
import plotly.offline as offline
from plotly.graph_objs import *
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot


## Read shape file containing the capital region as polygons into variable 'grid' (Data from https://tiedostopalvelu.maanmittauslaitos.fi/tp/kartta)
# File path
fp_grid = "data/pkseutu.shp"

# Read in data
grid = gpd.read_file(fp_grid)

# Check if crs is correct and set crs to ETRS89 / TM35FIN if the crs is not defined correctly
if (grid.crs != "epsg:3067"):    
    grid = grid.set_crs(epsg=3067)
# Reproject to WGS 84 / Pseudo-Mercator if the crs is not defined correctly
if (grid.crs != "epsg:3857"):    
    grid = grid.to_crs(epsg=3857)

# Combine polygons of each city to form one polygon of the whole capial region
grid['constant'] = 0
boundary = grid.dissolve(by='constant')

# Check the data
print(grid.head())
print(grid.crs)
print(boundary)

ModuleNotFoundError: No module named 'plotly'

In [None]:
## Read population grid data for 2018 into a variable `pop`. 

# Specify the url for web feature service
url = 'https://kartta.hsy.fi/geoserver/wfs'

# Specify parameters (read data in json format).
params = dict(service='WFS',
              version='2.0.0',
              request='GetFeature',
              typeName='asuminen_ja_maankaytto:Vaestotietoruudukko_2018',
              outputFormat='json')

# Fetch data from WFS using requests
r = requests.get(url, params=params)

# Create GeoDataFrame from geojson
pop = gpd.GeoDataFrame.from_features(geojson.loads(r.content))

# Clean out unnecessary columns
pop = pop[["asukkaita", "geometry"]]

# Set crs to ETRS89 / GK25FIN and reproject to WGS 84 / Pseudo-Mercator if the crs is not defined correctly
if (pop.crs == None):    
    pop = pop.set_crs(epsg=3879)
if (pop.crs != "epsg:3857"):    
    pop = pop.to_crs(epsg=3857)

# Check the data
print(pop.head())
print(pop.crs)

In [None]:
## Read buffer polygons that describe 400 m, 800 m, 1200 m and 1600 m accessibilities via pedestrian and bicycle ways from metro and 
## train stations 

# Save wanted buffer sizes in a list which is used in loading the data
dists = ['400', '800', '1200', '1600']

# Create an empty geopandas GeoDataFrame for the data
buffs = gpd.GeoDataFrame()

# Iterate through wanted buffer distance list
for dist in dists:

    # Specify the url for web feature service and typeName of the data layer
    url_buff = 'https://kartta.hsy.fi/geoserver/wfs'
    type_name = dist + 'm_verkostobufferi'

    # Specify parameters (read data in json format).
    params_buff = dict(service='WFS',
                  version='2.0.0',
                  request='GetFeature',
                  typeName=type_name,
                  outputFormat='json')

    # Fetch data from WFS using requests
    r = requests.get(url_buff, params=params_buff)

    # Create GeoDataFrame from geojson
    buff = gpd.GeoDataFrame.from_features(geojson.loads(r.content))

    # Clean out unnecessary columns
    buff = buff[["asema", "geometry"]]

    # Set crs to ETRS89 / GK25FIN and reproject to WGS 84 / Pseudo-Mercator if the crs is not defined correctly
    if (buff.crs == None):    
        buff = buff.set_crs(epsg=3879)
    if (buff.crs != "epsg:3857"):    
        buff = buff.to_crs(epsg=3857)

    # Clip out stations that are located outside the capital region
    clip_mask = buff.within(boundary.at[0,'geometry'])
    buff = buff.loc[clip_mask]
    
    # Create column which indicates buffer distance for the slider
    buff['dist'] = dist

    # Check the data
    print(buff.head(1))
    print(len(buff))

    # Add the data to combined GeoDataFrame
    buffs = buffs.append(buff)
    
# Check output of the loop
buffs.head()

In [None]:
### PROCESS DATA ###

# Create new column to 'buffs' where total resident amounts within each buffer areas are stored 
buffs["residents_sum"] = None

# Create a spatial join between grid layer and buffer layer. "Intersects" option used here to include all grid cells which 
# touch the buffer area (NOTE that with this choice the accuracy of the buffers is lost due to the grid resolution)
pop_combined = gpd.sjoin(pop, buffs, how="left", op="intersects")

# Group the data by both train and metro station names AND distance classes
groupedA = pop_combined.groupby(['asema','dist'])

#buffs.head()
#pop_combined.head()
groupedA.head()

In [None]:
# Store sum of residents living approximately 400 m, 800 m, 1200 m and 1600 m from station to column "sum" 
# (the distance doesn't stay constant in performed analysis but accurate enough for this visualization)
for name, group in groupedA:
    buffs.loc[(buffs["asema"]==name[0]) & (buffs['dist']==name[1]),'residents_sum'] = group["asukkaita"].agg("sum")
    
    
## Convert the buffer polygons to points (location set as centroids of 400 m buffers, approximate of the station locations)
point_data = buffs
point_data = point_data.reset_index()

# Replace NoData in residents_sum column with 0
point_data["residents_sum"] = point_data["residents_sum"].replace(to_replace=np.nan, value=0)

# Group the data by only train and metro station names
groupedB = point_data.groupby('asema')
#firsts = groupedB.first()

# Convert to points based on centroids
for name, group in groupedB:
    point_data.loc[point_data["asema"]==name,'geometry'] = group['geometry'].centroid

point_data.head()

In [293]:
# Assign same point (centroid of 400 m buffer polygon) for each data row of same stations
#for i, row in point_data.iterrows():
    #new_point = point_data.loc[(point_data['asema']==row[0]) & (point_data['dist']=='400'), 'geometry']
    #print(row[3])
    #print(i)
    #if (row[3] != '400'):
        #point_data.at[i, 'geometry'] = point_data.loc[(point_data['asema']==row[0]) & (point_data['dist']=='400'), 'geometry']

        
# Reorganize the column order
point_data = point_data[["geometry","asema","residents_sum", "dist"]]

    
# Check data    
print(point_data.head())
#type(point_data)
#firsts.head()
#type(firsts)

                          geometry        asema  residents_sum dist
0  POINT (2777053.880 8448898.363)       Käpylä           1252  400
1  POINT (2779412.695 8470148.966)   Lentoasema              0  400
2  POINT (2788489.108 8467666.897)  Hiekkaharju           1920  400
3  POINT (2776435.063 8438154.680)     Helsinki            885  400
4  POINT (2769199.228 8453290.280)   Kannelmäki           7698  400


In [None]:
print(point_data.loc[(point_data['dist']=='400') & (point_data['asema']=='Malmi')])
print(point_data.loc[(point_data['dist']=='800') & (point_data['asema']=='Malmi')])
print(point_data.loc[(point_data['dist']=='1200') & (point_data['asema']=='Malmi')])
print(point_data.loc[(point_data['dist']=='1600') & (point_data['asema']=='Malmi')])


In [294]:
### CREATE PLOT with slider

# Create colorscale:    
scl = [[0.0, '#ffffff'],[0.2, '#ff9999'],[0.4, '#ff4d4d'], \
       [0.6, '#ff1a1a'],[0.8, '#cc0000'],[1.0, '#4d0000']] # reds

# Create empty list for data object:    
data_slider = []


for distance in point_data.dist.unique():


    # Select data of only one distance 
    data_seg = point_data[point_data['dist']==distance]

    # Transform the columns into string 
    #for col in data_seg.columns:  
     #   data_seg[col] = data_seg[col].astype(str)

    # Create the text for mouse-hover  
    #data_seg['ase'] = df_sected_crime['State'] + 
    #'Pop: ' /span> df_sected_crime['Population']'Murder rate: '+df_sected_crime['Murder_per100000']

    # Create dictionary with the data for the current distance 
    data_one_dist = dict(
                        type='choropleth',
                        locations=data_seg['geometry'],
                        z=data_seg['residents_sum'].astype(float),
                        locationmode='ISO-3',
                        colorscale = scl,
                        text = data_seg['asema'],
                        )

    data_slider.append(data_one_dist)  # Add dictionary to the list of dictionaries for the slider


In [295]:
# Create the steps for slider
steps = []

for i in range(len(data_slider)):
    step = dict(method='restyle',
                args=['visible', [False] * len(data_slider)],
                label='Walking distance from train or metro station {}'.format((i+1)*400)) # label to be displayed for each step (year)
    step['args'][1][i] = True
    steps.append(step)

# Create the 'sliders' object from the 'steps' 
sliders = [dict(active=0, pad={"t": 1}, steps=steps)]  

In [None]:
# Set up the layout 
layout = dict(geo=dict(scope='usa',
                       projection={'type': 'albers usa'}),
              sliders=sliders)

# Create the figure object:
fig = dict(data=data_slider, layout=layout) 

# Plot in the notebook
plotly.offline.iplot(fig)

# Plot in a separete browser window
#offline.plot(fig, auto_open=True, image = 'png', image_filename="map_us_crime_slider" ,image_width=2000, image_height=1000, 
              filename='/your_path/map_us_crime_slider.html', validate=True
