**Dataset exploration and investigation for the Event Disruption Use Case**

This notebook contains an incomplete attempt to import event/planned works data from Melbourne City's open datasets, and to overlay it with their pedestrian sensor data.

The idea is to see if the sensor is inside, adjacent to, or close to the event, and then to measure the impacts of this proximity. The event dataset's geo information is contained in shape files, while the sensors are point coordinates. I have added buffers to the shapes to expand them slightly, so that we can see if the sensor point coordinates fit in either the shape itself, or slightly expanded versions (i.e. 'inside', 'adjacent' or 'near')

Currently it provides a quick visualisation of the concept, then creates the first merged dataset that will be required for deeper analysis.

In [1]:
#Import a bunch of stuff that we need for this task

from shapely.geometry import Point, Polygon, shape, MultiPolygon

from sodapy import Socrata
import pandas as pd
import time
import warnings
warnings.filterwarnings('ignore')
from IPython.core.display import display, HTML

import geopandas as gpd

In [2]:
#This is a quick way to import the geospatial 'event' data from the CoM repository

activities_url="https://data.melbourne.vic.gov.au/api/geospatial/txcy-uafv?method=export&format=GeoJSON"
activities_gdf=gpd.read_file(activities_url)

In [3]:
#Some pre-processing of that event dataset to help us later on

activities_gdf.start_date = pd.to_datetime(activities_gdf.start_date, format='%Y-%m-%d')
activities_gdf.end_date = pd.to_datetime(activities_gdf.end_date, format='%Y-%m-%d')
activities_gdf['start_year'] = activities_gdf.start_date.dt.year
activities_gdf['start_month'] = activities_gdf.start_date.dt.month_name()
activities_gdf['start_mdate'] = activities_gdf.start_date.dt.day
activities_gdf['start_time'] = activities_gdf.start_date.dt.hour
activities_gdf['end_year'] = activities_gdf.end_date.dt.year
activities_gdf['end_month'] = activities_gdf.end_date.dt.month_name()
activities_gdf['end_mdate'] = activities_gdf.end_date.dt.day
activities_gdf['end_time'] = activities_gdf.end_date.dt.hour

activities_gdf = activities_gdf.drop(['location', 'source_id', 'small_area', 'start_date', 'end_date'
                                     , 'notes', 'status'], axis=1)

activities_gdf['adjacent'] = activities_gdf['geometry'].buffer(0.0005)
activities_gdf['near'] = activities_gdf['geometry'].buffer(0.001)

In [4]:
#for a quick example, we might just want to look at events that are still current
#note that this is hardcoded, and would soon be inaccurate

current_act = activities_gdf[((activities_gdf.end_year == 2022) 
                              & (activities_gdf.end_month.isin(['September', 'October', 'August', 'December', 'November'])))
                            | (activities_gdf.end_year > 2022)]

In [5]:
#Now we grab the sensor location data

def sensor_location():
    client = Socrata('data.melbourne.vic.gov.au', 'nlPM0PQJSjzCsbVqntjPvjB1f', None)
    sensor_location_data_id = "h57g-5234"
    results = client.get(sensor_location_data_id)
    df = pd.DataFrame.from_records(results)
    sensor_location = df[["sensor_id", "latitude", "longitude", "sensor_description"]]
    sensor_location.columns = ["sensor_id", "lat", "lon", "sensor_description"]
    sensor_location["lat"] = sensor_location["lat"].apply(lambda x: float(x))
    sensor_location["lon"] = sensor_location["lon"].apply(lambda x: float(x))
    return sensor_location

location_data = sensor_location()

location_data['sensor_id'] = location_data['sensor_id'].astype(int)


In [6]:
#This map just demonstrates the 'geometry', 'adjacent' and 'near' polygons.
#This is so we can compare whether a sensor is inside an event's geometry, adjacent to it, or near to it.

style1 = {'fillColor': 'red', 'color': 'red'}
style2 = {'fillColor': 'orange', 'color': 'orange'}
style3 = {'fillColor': 'yellow', 'color': 'yellow'}

import folium

#Create a map object centered on Melbourne
map = folium.Map(location=[-37.81216592937499, 
                           144.961812290625], 
                          zoom_start=14)

#Add the current events to the map
folium.GeoJson(data=current_act.geometry, style_function=lambda x:style1).add_to(map)
folium.GeoJson(data=current_act.adjacent, style_function=lambda x:style2).add_to(map)
folium.GeoJson(data=current_act.near, style_function=lambda x:style3).add_to(map)

#Add all sensor locations
for i in range(0,len(location_data)):
   folium.Marker(
      location=[location_data.iloc[i]['lat'], location_data.iloc[i]['lon']],
      popup=location_data.iloc[i]['sensor_description']
   ).add_to(map)

map

In [7]:
#The previous visualisation showed two different datasets on the same map. That's fine for visualisation,
#but not really sufficient for deeper analysis.
#The next steps will require a combined dataset.
#This new dataset will combine the sensor ids and the event geography.

activities_gdf[["In","Adjacent","Near"]] = None
activities_gdf[["In","Adjacent","Near"]] = activities_gdf[["In","Adjacent","Near"]].astype('object')

for i in range(activities_gdf.shape[0]):
    for j in range(location_data.shape[0]):
        if MultiPolygon(activities_gdf.geometry[i]).contains(Point(location_data.lon[j], location_data.lat[j])):
            try:
                activities_gdf.In[i] += ', ' 
                activities_gdf.In[i] += str(location_data.sensor_id[j])
            except:
                activities_gdf.In[i] = str(location_data.sensor_id[j])

        elif Polygon(activities_gdf.adjacent[i]).contains(Point(location_data.lon[j], location_data.lat[j])):
            try:
                activities_gdf.Adjacent[i] += ', ' 
                activities_gdf.Adjacent[i] += str(location_data.sensor_id[j])
            except:
                activities_gdf.Adjacent[i] = str(location_data.sensor_id[j])

        elif Polygon(activities_gdf.near[i]).contains(Point(location_data.lon[j], location_data.lat[j])):
            try:
                activities_gdf.Near[i] += ', ' 
                activities_gdf.Near[i] += str(location_data.sensor_id[j])
            except:
                activities_gdf.Near[i] = str(location_data.sensor_id[j])
           