**Dataset exploration and investigation for the Event Disruption Use Case**

This notebook contains an incomplete attempt to import event/planned works data from Melbourne City's open datasets, and to overlay it with their pedestrian sensor data.

The idea is to see if the sensor is inside, adjacent to, or close to the event, and then to measure the impacts of this proximity. The event dataset's geo information is contained in shape files, while the sensors are point coordinates. I have added buffers to the shapes to expand them slightly, so that we can see if the sensor point coordinates fit in either the shape itself, or slightly expanded versions (i.e. 'inside', 'adjacent' or 'close')

In [1]:
from shapely.geometry import Point, Polygon, shape, MultiPolygon

from sodapy import Socrata
import pandas as pd
import time
import warnings
warnings.filterwarnings('ignore')
from IPython.core.display import display, HTML

import geopandas as gpd

In [2]:
activities_url="https://data.melbourne.vic.gov.au/api/geospatial/txcy-uafv?method=export&format=GeoJSON"
activities_gdf=gpd.read_file(activities_url)

In [3]:
activities_gdf.start_date = pd.to_datetime(activities_gdf.start_date, format='%Y-%m-%d')
activities_gdf.end_date = pd.to_datetime(activities_gdf.end_date, format='%Y-%m-%d')
activities_gdf['start_year'] = activities_gdf.start_date.dt.year
activities_gdf['start_month'] = activities_gdf.start_date.dt.month_name()
activities_gdf['start_mdate'] = activities_gdf.start_date.dt.day
activities_gdf['start_time'] = activities_gdf.start_date.dt.hour
activities_gdf['end_year'] = activities_gdf.end_date.dt.year
activities_gdf['end_month'] = activities_gdf.end_date.dt.month_name()
activities_gdf['end_mdate'] = activities_gdf.end_date.dt.day
activities_gdf['end_time'] = activities_gdf.end_date.dt.hour

activities_gdf = activities_gdf.drop(['location', 'activity_id', 'source_id', 'small_area', 'start_date', 'end_date'
                                     , 'notes', 'status'], axis=1)

activities_gdf['adjacent'] = activities_gdf['geometry'].buffer(0.0005)
activities_gdf['near'] = activities_gdf['geometry'].buffer(0.001)

In [4]:
sensor_history = pd.read_csv('sensor_history.csv')

def sensor_location():
    client = Socrata('data.melbourne.vic.gov.au', 'nlPM0PQJSjzCsbVqntjPvjB1f', None)
    sensor_location_data_id = "h57g-5234"
    results = client.get(sensor_location_data_id)
    df = pd.DataFrame.from_records(results)
    sensor_location = df[["sensor_id", "latitude", "longitude"]]
    sensor_location.columns = ["sensor_id", "lat", "lon"]
    sensor_location["lat"] = sensor_location["lat"].apply(lambda x: float(x))
    sensor_location["lon"] = sensor_location["lon"].apply(lambda x: float(x))
    return sensor_location

location_data = sensor_location()

location_data['sensor_id'] = location_data['sensor_id'].astype(int)
sensor_data = sensor_history.merge(location_data, on='sensor_id', how = 'inner')

In [7]:
#This map just demonstrates the 'geometry', 'adjacent' and 'near' polygons.
#This is so we can compare whether a sensor is inside an event's geometry, adjacent to it, or near to it.

import folium

map = folium.Map(location=[-37.81216592937499, 
                           144.961812290625], 
                          zoom_start=14)
folium.GeoJson(data=activities_gdf.geometry[17]).add_to(map)
folium.GeoJson(data=activities_gdf.adjacent[17]).add_to(map)
folium.GeoJson(data=activities_gdf.near[17]).add_to(map)
map

In [25]:
#This doesn't quite work. It should be able to create a new row for each sensor id.
#Currently, if an event finds a relevant sensor, it captures that, but it overwrites this
#if it finds any more relevant sensors.

activities_gdf[["In","Adjacent","Near"]] = None

for i in range(activities_gdf.shape[0]):
    for j in range(location_data.shape[0]):
        if MultiPolygon(activities_gdf.geometry[i]).contains(Point(location_data.lon[j], location_data.lat[j])):
            activities_gdf.In[i] = location_data.sensor_id[j] 

        elif Polygon(activities_gdf.adjacent[i]).contains(Point(location_data.lon[j], location_data.lat[j])):
            activities_gdf.Adjacent[i] = location_data.sensor_id[j]

        elif Polygon(activities_gdf.near[i]).contains(Point(location_data.lon[j], location_data.lat[j])):
            activities_gdf.Near[i] = location_data.sensor_id[j]


In [None]:
#More work to be done to get the dataframe correct for the regression.