In [7]:
import os
import time
from datetime import datetime
import numpy as np
import pandas as pd
from sodapy import Socrata
import geopandas
import plotly.express as px
from shapely.geometry import Polygon, Point
from d2i_tools import *
import warnings
warnings.simplefilter("ignore")

# sign up for Socrata id at https://data.melbourne.vic.gov.au/signup
# and create App Token per https://support.socrata.com/hc/en-us/articles/210138558-Generating-an-App-Token
# Jason created a good basic run through of accessing this open data in ipynb below
# https://github.com/D2I-Melbourne/POCOM/blob/master/Jason/UsingSodapyandBuildingETL.ipynb
# and on sodapy https://github.com/xmunoz/sodapy 

# to start on Sodapy usage, check out the 2 ipynb's in https://github.com/xmunoz/sodapy/tree/master/examples

apptoken = os.environ.get("SODAPY_APPTOKEN")
domain = "data.melbourne.vic.gov.au"
client = Socrata(domain, apptoken)

In [8]:
# some function definitions
def dseries(df, col, attrib, attrib_sub=None, count=False):  
    ls = []
    if attrib_sub == None:
        for d in df[col]:
            if count:
                ls.append(len(d[attrib]))
            else:
                ls.append(d[attrib])
    else:
        for d in df[col]:
            if count:
                ls.append(len(d[attrib][attrib_sub]))
            else:
                ls.append(d[attrib][attrib_sub])
    return pd.Series(ls)

# function to make dataframe of interested info
def interesteddf(rdf):
    a = dseries(rdf, 'resource', 'name')
    b = dseries(rdf, 'resource', 'id')
    c = dseries(rdf, 'resource', 'parent_fxf')
    d = dseries(rdf, 'resource', 'description')
    e = dseries(rdf, 'resource', 'data_updated_at')
    f = dseries(rdf, 'resource', 'page_views', 'page_views_last_week')
    g = dseries(rdf, 'resource', 'page_views', 'page_views_last_month')
    h = dseries(rdf, 'resource', 'page_views', 'page_views_total')
    i = dseries(rdf, 'resource', 'download_count')
    j = dseries(rdf, 'classification', 'categories')
    k = dseries(rdf, 'classification', 'domain_category')
    l = dseries(rdf, 'classification', 'domain_tags')
    m = dseries(rdf, 'classification', 'domain_metadata')
#     n = dseries(rdf, 'resource', 'columns_name', True)  # count=True, not working yet
    col =  ['name', 'id', 'parent_fxf', 'description', 'data_upd_at', 'pv_last_wk', 'pv_last_mth', 'pv_total',
            'download_count', 'categories', 'domain_category', 'domain_tags', 'domain_metadata']
#             'no_cols']
    df = pd.concat([a,b,c,d,e,f,g,h,i,j,k,l,m], axis='columns')
    df.columns = col
    return df

def read_current_parkingSensors():
    cols = ["st_marker_id", "status", 
#             "disabilityext1","duration1","effectiveonph1",
#             "endtime1","exemption1","fromday1","starttime1","today1","typedesc1",
#             "description1", "description2","description3","description4","description5","description6",
            "bay_id"]
    apptoken = os.environ.get("SODAPY_APPTOKEN")
    domain = "data.melbourne.vic.gov.au"
    client = Socrata(domain, apptoken)
    r1 = client.get_all('vh2v-4nfs')  # read current parking sensors snapshot available
    r2 = client.get_all("ntht-5rk7")  # read parking restrictions records of all bays
    df1 = pd.DataFrame.from_dict(r1)
    df2 = pd.DataFrame.from_dict(r2)
    df = pd.merge(df1, df2, left_on='bay_id', right_on='bayid', how='left')
    df = df[cols].rename(columns={'st_marker_id':'marker_id'})
    return df

def create_fullUpdate(df):
    gdf = geopandas.read_file("datasets/onStreetParkingBays_baselist_bayCentroid.geojson")
    gdf["marker_id"] = gdf["marker_id"].str.upper()
    df["marker_id"] = df["marker_id"].str.upper()
    ngdf = gdf.merge(df, how="outer", on="marker_id").drop_duplicates()
    ngdf["status"] = ngdf["status"].fillna("unknown")
    ngdf = ngdf.drop(columns=["rd_seg_id","rd_seg_dsc"])
    print(f"{ngdf.shape[0]} parking sensors in full base list.")
    return ngdf

def plot_map(df,map_height=400,map_zoom=12, marker_max=9):
    fig = px.scatter_mapbox(df, lat="lati", lon="long", hover_name="marker_id", #hover_data=["description1","description2","description3","description4","description5","description6"],
                                color="status", size="area_m2", size_max=marker_max, zoom=map_zoom, height=map_height,
                                color_discrete_map={
                                    "unknown":"gray", "Unoccupied":"green", "Present":"red"},
                                category_orders={"status": ["unknown", "Unoccupied", "Present"]})
    # fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(mapbox_style="carto-positron")
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()

# [01] Get info (meta data) about datasets

In [9]:
# from https://bitbucket-students.deakin.edu.au/projects/D2IC-PG/repos/d2i---melbourne-city/browse/T2_2021/Playground/Albert/8lb_meta.ipynb

rds = client.datasets()
rdf = pd.DataFrame.from_dict(rds)
ds_df = interesteddf(rdf)
df1 = ds_df.copy()

keys = []
for record in ds_df['domain_metadata']:
    for kv in record:
        keys.append(kv['key'])
keys_ds = pd.Series(keys).drop_duplicates().reset_index(drop=True)

for key in keys_ds:
    df1[key] = np.nan
    for i in range(df1.shape[0]):
        for kv in df1['domain_metadata'][i]:
            if kv['key'] == key:
                df1[key][i] = kv['value']

# disinterested domain_metadata
dis = ['How-to-use_Applicable-standard-(URL)',\
       'Internal-management_Source-system-(GIS,-AssetMaster,-etc)',
       'Internal-management_Update-mechanism',
       'Internal-management_IMGF-risk-level',
       'Melbourne-Metadata_Further-Information',
       'Quality_Source-data-update-frequency']
df1 = df1.drop(columns=dis)
print(f'The shape of df1 is {df1.shape}.')
print('Below are my selected meta data for parking and pedestrian snapshot datasets:')
df1[df1.id.isin(['vh2v-4nfs', 'd6mv-s43h'])].T

The shape of df1 is (221, 21).
Below are my selected meta data for parking and pedestrian snapshot datasets:


Unnamed: 0,3,28
name,On-street Parking Bay Sensors,Pedestrian Counting System - Past Hour (counts...
id,vh2v-4nfs,d6mv-s43h
parent_fxf,[],[]
description,Contains information from in-ground car parkin...,<b>Current issue 23/09/2020</b>\nPlease note: ...
data_upd_at,2021-11-10T22:06:28.000Z,2021-11-10T22:01:45.000Z
pv_last_wk,167,17
pv_last_mth,655,140
pv_total,38408,7898
download_count,12083395,20537
categories,"[transportation, economy]",[]


# [02] Parking sensor data

In [10]:
# simple read of current parking sensors data snapshot

r2a = client.get_all('vh2v-4nfs')  # read current parking sensors snapshot available
df2a = pd.DataFrame.from_dict(r2a)
print(f'The shape of current parking sensors snapshot dataset is {df2a.shape}.')
print('Below is the first 3 rows of this dataset:')
df2a.head(3)

The shape of current parking sensors snapshot dataset is (1041, 7).
Below is the first 3 rows of this dataset:


Unnamed: 0,bay_id,st_marker_id,status,location,lat,lon,:@computed_region_evbi_jbp8
0,2252,3945S,Unoccupied,"{'latitude': '-37.81261862917434', 'longitude'...",-37.81261862917434,144.95362160189677,1
1,8121,15142N,Unoccupied,"{'latitude': '-37.81670150241312', 'longitude'...",-37.81670150241312,144.9817833119722,1
2,6425,13677W,Unoccupied,"{'latitude': '-37.820968971464204', 'longitude...",-37.82096897146421,144.94621455152475,1


In [47]:
# now I read the parquet file of time collected parking sensors data
# the parquet file is a result of crontab jobs ran from 30-Sep to 10-Nov
# the python code reads 'vh2v-4nfs' at 15min intervals, then does some manipulation
# and over time, builds timed parking sensors data below

dfpark = pd.read_parquet('mac_/datasets/003_vh2v-4nfs__bufferred.parquet')  # read time collected parking sensor data
dfpark.head(3)


Unnamed: 0,bay_id,st_marker_id,status,lat,lon,db_read_time
0,2671,10180E,Unoccupied,-37.8040527480692,144.94954174409875,2021-09-29 22:30:06.954481
1,6647,13622E,Unoccupied,-37.81944571124622,144.94364028336634,2021-09-29 22:30:06.954481
2,3380,6203W,Unoccupied,-37.806349134859786,144.95100392489394,2021-09-29 22:30:06.954481


# [03] Pedestrian sensors count data

In [14]:
# simple read of current pedestrian count sensors data snapshot

r2b = client.get_all('d6mv-s43h')  # read current parking sensors snapshot available
df2b = pd.DataFrame.from_dict(r2b)
print(f'The shape of current pedestrian count sensors snapshot dataset is {df2a.shape}.')
print('Below is the first 5 rows of this dataset:')
df2b.head(5)

The shape of current pedestrian count sensors snapshot dataset is (1041, 7).
Below is the first 5 rows of this dataset:


Unnamed: 0,date,time,sensor_id,direction_1,direction_2,total_of_directions,date_time
0,2021-11-11T00:00:00.000,08:11,28,6,8,14,2021-11-11T08:11:00.000
1,2021-11-11T00:00:00.000,08:11,18,2,4,6,2021-11-11T08:11:00.000
2,2021-11-11T00:00:00.000,08:10,24,6,1,7,2021-11-11T08:10:00.000
3,2021-11-11T00:00:00.000,08:11,62,0,1,1,2021-11-11T08:11:00.000
4,2021-11-11T00:00:00.000,08:10,41,14,2,16,2021-11-11T08:10:00.000


In [45]:
# read the parquet file of time collected pedestrian count sensors data
# the parquet file is a result of crontab jobs ran from 30-Sep to 10-Nov
# the python code reads 'd6mv-s43h' at 60min intervals, then a little manipulation
# and over time, builds timed pedestrian count sensors data below

dfpeds = pd.read_parquet('mac_/datasets/029_d6mv-s43h__bufferred.parquet')  # and pedestrian count data
print(f'pedestrian collected data with duplicates has {dfpeds.shape[0]} rows')
dfpeds = dfpeds.sort_values(by=['date_time']).drop_duplicates()
print(f'pedestrian collected data after removing duplicates has {dfpeds.shape[0]} rows')

print('Below is the first 5 rows of this dataset:')
dfpeds.tail(5)

pedestrian collected data with duplicates has 2275037 rows
pedestrian collected data after removing duplicates has 1974589 rows
Below is the first 5 rows of this dataset:


Unnamed: 0,sensor_id,direction_1,direction_2,date_time
2999,75,0,1,2021-11-10T12:41:00.000
2898,59,0,6,2021-11-10T12:41:00.000
2691,62,1,4,2021-11-10T12:41:00.000
3326,56,5,1,2021-11-10T12:41:00.000
3258,12,2,0,2021-11-10T12:41:00.000
