In [3]:
#!/usr/bin/env python

# make sure to install these packages before running:
!pip install sodapy

Collecting sodapy
  Downloading https://files.pythonhosted.org/packages/9e/74/95fb7d45bbe7f1de43caac45d7dd4807ef1e15881564a00eef489a3bb5c6/sodapy-2.1.0-py2.py3-none-any.whl
Installing collected packages: sodapy
Successfully installed sodapy-2.1.0


In [4]:
#import the libraries 
import numpy as np
import pandas as pd
from pandas import DataFrame as df, Series as se
#import the library for the API
from sodapy import Socrata

#Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [5]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.melbourne.vic.gov.au", None)


#getting the pedestrian count data from city of melbourne website from 2009-05-01 to 
#2020-10-31
# results = client.get("b2ak-trbp", limit=3391522)

#update to 2020-12-31
results = client.get("b2ak-trbp", limit= 3574594)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)




In [6]:
results_df = results_df.copy()

In [7]:
#Examining the data types of the features of the dataset  
results_df.dtypes

id               object
date_time        object
year             object
month            object
mdate            object
day              object
time             object
sensor_id        object
sensor_name      object
hourly_counts    object
dtype: object

In [8]:
#formatting the date column
results_df['date'] = results_df['mdate'] + '-' + results_df['month'] + '-' + results_df['year']
results_df.drop(columns=['id',	'date_time','year',	'month',	'mdate',	'day',	'time' ], inplace = True)

In [9]:
#converting 'sensor_id' and 'hourly_counts' features from object datatypes to int
results_df[['sensor_id','hourly_counts']] = results_df[['sensor_id','hourly_counts']].astype('int')
results_df

Unnamed: 0,sensor_id,sensor_name,hourly_counts,date
0,34,Flinders St-Spark La,300,1-November-2019
1,39,Alfred Place,604,1-November-2019
2,37,Lygon St (East),216,1-November-2019
3,40,Lonsdale St-Spring St (West),627,1-November-2019
4,36,Queen St (West),774,1-November-2019
...,...,...,...,...
3574589,69,Flinders Ln -Degraves St (Crossing),0,28-February-2021
3574590,71,Westwood Place,19,28-February-2021
3574591,73,Bourke St - Spencer St (South),90,28-February-2021
3574592,72,Flinders St-ACMI,37,28-February-2021


In [10]:
#Examining the features after data type conversion
results_df.dtypes

sensor_id         int64
sensor_name      object
hourly_counts     int64
date             object
dtype: object

In [11]:
#converting date_time feature from object datatypes to date
results_df['date'] = results_df['date'].astype('datetime64[ns]')
results_df['sensor_name']= results_df['sensor_name'].astype(pd.StringDtype())

In [12]:
#Examining the features after data type conversion
results_df.dtypes

sensor_id                 int64
sensor_name              string
hourly_counts             int64
date             datetime64[ns]
dtype: object

In [13]:
#extracting data from 2015 till date
results_df = results_df[results_df['date'] > '2014-12-31']

In [66]:
results_df

Unnamed: 0,sensor_id,sensor_name,hourly_counts,date
0,34,Flinders St-Spark La,300,2019-11-01
1,39,Alfred Place,604,2019-11-01
2,37,Lygon St (East),216,2019-11-01
3,40,Lonsdale St-Spring St (West),627,2019-11-01
4,36,Queen St (West),774,2019-11-01
...,...,...,...,...
3574589,69,Flinders Ln -Degraves St (Crossing),0,2021-02-28
3574590,71,Westwood Place,19,2021-02-28
3574591,73,Bourke St - Spencer St (South),90,2021-02-28
3574592,72,Flinders St-ACMI,37,2021-02-28


In [67]:
#Aggregating hourly pedestrian count for each by sensor-id
new_results_df = pd.DataFrame(results_df.groupby(['date', 'sensor_id','sensor_name'])['hourly_counts'].sum())
new_results_df = new_results_df.rename(columns={"hourly_counts": "daily_count"})
new_results_df = new_results_df.reset_index()
new_results_df

Unnamed: 0,date,sensor_id,sensor_name,daily_count
0,2015-01-01,2,Bourke Street Mall (South),21217
1,2015-01-01,3,Melbourne Central,32695
2,2015-01-01,4,Town Hall (West),36958
3,2015-01-01,5,Princes Bridge,31224
4,2015-01-01,6,Flinders Street Station Underpass,20457
...,...,...,...,...
106290,2021-02-28,69,Flinders Ln -Degraves St (Crossing),1834
106291,2021-02-28,71,Westwood Place,611
106292,2021-02-28,72,Flinders St-ACMI,4780
106293,2021-02-28,73,Bourke St - Spencer St (South),2097


In [68]:
#checking the frequency of each sensor from 2015 to 2021
#so as to pick the top 5
new_results_df.sensor_id.value_counts()

34    2251
9     2251
10    2251
18    2251
2     2241
      ... 
60      96
73      90
72      59
75      28
16       8
Name: sensor_id, Length: 74, dtype: int64

In [69]:
#filtering the dataset to extract the records for the daily pedestrian count for 
#the top 5 locations 

sensor_id = [34, 9,10,18,2]
new_results_df.sensor_id.isin(sensor_id)
new_results_df[new_results_df.sensor_id.isin(sensor_id)]

Unnamed: 0,date,sensor_id,sensor_name,daily_count
0,2015-01-01,2,Bourke Street Mall (South),21217
7,2015-01-01,9,Southern Cross Station,2813
8,2015-01-01,10,Victoria Point,2592
15,2015-01-01,18,Collins Place (North),1711
30,2015-01-01,34,Flinders St-Spark La,4086
...,...,...,...,...
106234,2021-02-28,2,Bourke Street Mall (South),9754
106240,2021-02-28,9,Southern Cross Station,826
106241,2021-02-28,10,Victoria Point,861
106246,2021-02-28,18,Collins Place (North),1015


In [70]:
import plotly.express as px
df = new_results_df[new_results_df.sensor_id == 2 ]
fig = px.scatter(df, x= "date", y="daily_count", hover_data=['daily_count'])

fig.update_layout(title='Bourke Street Mall (South)')
fig.show()

df.to_csv('Bourke_Street_Mall_South.csv')  

In [72]:
import plotly.express as px
df = new_results_df[new_results_df.sensor_id == 9 ]
fig = px.scatter(df, x= "date", y="daily_count", hover_data=['daily_count'])
fig.update_layout(title='Victoria Point')
fig.show()
df.to_csv('Victoria_Point.csv')  

In [73]:
import plotly.express as px
df = new_results_df[new_results_df.sensor_id == 10 ]
fig = px.scatter(df, x= "date", y="daily_count", hover_data=['daily_count'])
fig.update_layout(title='Southern Cross Station')
fig.show()
df.to_csv('Southern_Cross_Station.csv')  

In [74]:
import plotly.express as px
df = new_results_df[new_results_df.sensor_id == 18 ]
fig = px.scatter(df, x= "date", y="daily_count", hover_data=['daily_count'])
fig.update_layout(title='Collins Place (North)')
fig.show()
df.to_csv('Collins_Place_North.csv') 

In [75]:
import plotly.express as px
df = new_results_df[new_results_df.sensor_id == 34 ]
fig = px.scatter(df, x= "date", y="daily_count", hover_data=['daily_count'])
fig.update_layout(title='Flinders St-Spark La')
fig.show()
df.to_csv('Flinders_St-Spark_La.csv') 