# City of Melbourne

### Set up everything

In [2]:
# Set up
import pandas as pd
from sodapy import Socrata
import requests
from pprint import pprint
import gmaps
import os
import json

from api_keys_tokens import MyAppToken
from api_keys_tokens import google_key

### Get All The Pedestrian Data From the City of Melbourne via an API call

In [7]:
# Pedestrian Counting System - Monthly (counts per hour)
# Resource - https://dev.socrata.com/foundry/data.melbourne.vic.gov.au/b2ak-trbp

client = Socrata("data.melbourne.vic.gov.au", MyAppToken )
pedestrian_count_dataset_id = "b2ak-trbp"
pedestrian_count_request = client.get(pedestrian_count_dataset_id, limit=4000000)
Convert to pandas DataFrame

# Assign it to a data frame
pedestrian_count_df = pd.DataFrame.from_records(pedestrian_count_request)

In [9]:
# Send result to a csv file
pedestrian_count_df.to_csv("Melbourne City Council Data/all_pedestrian_data.csv",encoding='utf-8', index=False)

### Make a CSV file containing pedestrian data from all years

In [9]:
# send to a csv file
sensor_location_df.to_csv("Melbourne City Council Data/all_sensor_location_data.csv",encoding='utf-8', index=False)

### Make a CSV file containing only pedestrian data from August 2019 to August 2021

In [None]:
data = pd.read_csv("Melbourne City Council Data/all_pedestrian_data.csv")

# Find only the data between the beginning of August 2019 and the end of August 2021
filtered_df = data.loc[(data['date_time'] >= '2019-08-01') & (data['date_time'] < '2021-09-01')].sort_values(["id"])

# Send result to a DataFile
filtered_df.to_csv("Melbourne City Council Data/filtered_pedestrian_data.csv",encoding='utf-8', index=False)

### Get all the sensor location data via an API call

In [11]:
# Sensor Locations
# Resource - https://dev.socrata.com/foundry/data.melbourne.vic.gov.au/h57g-5234
client = Socrata("data.melbourne.vic.gov.au", MyAppToken )
sensor_location_id = "h57g-5234"
sensor_location_id_request = client.get(sensor_location_id, limit=100)

# Convert to pandas DataFrame
sensor_location_df = pd.DataFrame.from_records(sensor_location_id_request)

### Find the Postcode for Each Sensor  (using Google API Mapping)

In [10]:
# Get the data
data = pd.read_csv("Melbourne City Council Data/filtered_pedestrian_data.csv")
sensor_data = pd.read_csv("Melbourne City Council Data/all_sensor_location_data.csv")

# Add a "Postcode" column to the DataFrame.
sensor_data["Address"] = ""
# base url
base_url = "https://maps.googleapis.com/maps/api/geocode/json?"

# use iterrows to iterate through pandas dataframe
for index, row in sensor_data.iterrows():
    
    # Set the parameters of the searc
    params = {
    "latlng": f"{sensor_data.loc[index, 'latitude']}, {sensor_data.loc[index, 'longitude']}",
    "key": google_key
    }
    
    # assemble url and make API request
    try: 
        response = requests.get(base_url, params=params).json()
        # Get the postcode
        address = response["results"][0]["formatted_address"]
        # Assign it to the hotel DataFrame
        sensor_data.loc[index, 'Address'] = address
        # Log the result
        print(address)
        
    except (KeyError, IndexError):
        print("Missing field/result... skipping.")
    
    print("-----------------------------------------------------")


380A Elizabeth St, Melbourne VIC 3000, Australia
-----------------------------------------------------
150 Swanston St, Melbourne VIC 3000, Australia
-----------------------------------------------------
418 Swanston St, Melbourne VIC 3000, Australia
-----------------------------------------------------
Unnamed Road, Melbourne VIC 3004, Australia
-----------------------------------------------------
123 Flinders St, Melbourne VIC 3000, Australia
-----------------------------------------------------
252 La Trobe St, Melbourne VIC 3000, Australia
-----------------------------------------------------
231 Bourke Street Mall, Melbourne VIC 3000, Australia
-----------------------------------------------------
Sandridge Bridge, 1 Southbank Blvd, Southbank VIC 3006, Australia
-----------------------------------------------------
703/37 Swanston St, Melbourne VIC 3000, Australia
-----------------------------------------------------
2 Clarendon St, South Wharf VIC 3006, Australia
---------------

In [121]:
sensor_data["Postcode"] = sensor_data["Address"].str.slice(start=-15,stop=-11)

In [123]:
sensor_data.to_csv("Melbourne City Council Data/sensor_data_postcode.csv")

### Find Postcodes in Pedestrian DataSet

In [23]:
sensor_data_postcode = pd.read_csv("Melbourne City Council Data/sensor_data_postcode.csv")
postcode_list = sensor_data_postcode["Postcode"].unique()

### Sum up the pedestrian counts per sensor per month
#### (Note: based on Pedestrian Data filtered from August 2019 to August 2021)

In [None]:
# Read the data
pedestrian_data = pd.read_csv("Melbourne City Council Data/filtered_pedestrian_data.csv")

# Sum the data by year, month, and sensor id
summed_pedestrian_data = pedestrian_data.groupby(["year","month","sensor_id"]).sum("hourly_counts")
summed_pedestrian_data = summed_pedestrian_data["hourly_counts"].reset_index()

summed_pedestrian_data["month-year"] = summed_pedestrian_data["month"].astype(str) + " " + summed_pedestrian_data["year"].astype(str)
summed_pedestrian_data = summed_pedestrian_data.drop(["year","month"], axis=1)
# rename hourly count column
summed_pedestrian_data = summed_pedestrian_data.rename(columns={"hourly_counts": "monthly_count"})

ped_df = summed_pedestrian_data.pivot(index="sensor_id", columns="month-year", values="monthly_count")
ped_df

### Clean the data ( Remove all Null Values)

In [None]:
# Count NA values in each column
ped_df.isnull().sum(axis=1)

#Remove all rows with NA values in them
ped_df = ped_df.dropna()
# Remove the extra columns
ped_df = ped_df.reset_index()
ped_df = ped_df.rename_axis(None, axis=1)

### Create a CSV file of the summed values

In [None]:
# Send to csv
ped_df.to_csv("Melbourne City Council Data/summed_pedestrian_count_sensor_Aug19Aug20.csv",index=False)

### Merge summed pedestrian count per sensor per month with sensor location data

In [None]:
# Get sensor location data
sensor_data = pd.read_csv("Melbourne City Council Data/sensor_data_postcode.csv")

# Merge it with the other summed Pedestrian values data.
pedestrian_location_sensor_data = ped_df.merge(sensor_data, on="sensor_id")
pedestrian_location_sensor_data

# Remove unwanted columns
pedestrian_location_sensor_data = pedestrian_location_sensor_data.drop(columns=["installation_date","location","note"])
del pedestrian_location_sensor_data["Unnamed: 0"]

### Create a CSV file of the merged data

In [None]:
# Save to csv file
pedestrian_location_sensor_data.to_csv("Melbourne City Council Data/pedestrian_location_sensor_data.csv", index=False)

### Find Pedestrian Activity by Postcode per month

In [None]:
# Sum the pedestrian traffic by postcode
pedestrian_location_sensor_data_sum = pedestrian_location_sensor_data.groupby("Postcode").sum()

# Get rid of unwanted columns
pedestrian_location_sensor_data_sum = pedestrian_location_sensor_data_sum.drop(columns=["sensor_id","latitude","longitude"])

#  Show results
pedestrian_location_sensor_data_sum

In [None]:
# Sort the values of the columns
pedestrian_location_sensor_data_sum = pedestrian_location_sensor_data_sum[['August 2019', 'September 2019',
                                                                           'October 2019', 'November 2019', 'December 2019',
                                                                           'January 2020', 'February 2020', 'March 2020',
                                                                           'April 2020', 'May 2020', 'June 2020',
                                                                           'July 2020', 'August 2020', 'September 2020',
                                                                           'October 2020', 'November 2020', 'December 2020', 
                                                                           'January 2021','February 2021', 'March 2021',
                                                                           'April 2021', 'May 2021', 'June 2021',
                                                                           'July 2021', 'August 2021'
                                                                          ]]
# Transpose the data for mapping
df = pedestrian_location_sensor_data_sum.T

# Fix the names and columns
df = df.reset_index()
df = df.rename(columns={"index": "Date"})
# Fix the name of the index
df = df.rename_axis(None, axis=1)
df

#### Save as a CSV File

In [12]:
# Save as a csv
df.to_csv("Melbourne City Council Data/Pedestrian_Activity_by_Postcode_per_month.csv", index=False)

### Pedestrian activity shortly after the end of JobKeeper (April 2021 upto and including August 2021)

In [None]:
# Get the data
data = pd.read_csv("Melbourne City Council Data/Pedestrian_Activity_by_Postcode_per_month.csv")
# Filter the data
after_jobkeeper = data.loc[(data["Date"] == "April 2021") |
                           (data["Date"] == "May 2021") |
                           (data["Date"] == "June 2021") |
                           (data["Date"] == "July 2021") |
                           (data["Date"] == "August 2021")
                           ]

after_jobkeeper

##### Save as a CSV File

In [None]:
# Send to a csv
after_jobkeeper.to_csv("Melbourne City Council Data/after_jobkeeper_pedestrian_count.csv", index=False)

### Pedestrian activity before the beginning of JobKeeper (August 2019 until the end of March 2020)

In [None]:
# Get the data
data = pd.read_csv("Melbourne City Council Data/Pedestrian_Activity_by_Postcode_per_month.csv")
# Filter the data
before_jobkeeper = data.loc[(data["Date"] == "August 2019") |
                           (data["Date"] == "September 2019") |
                           (data["Date"] == "October 2019") |
                           (data["Date"] == "November 2019") |
                           (data["Date"] == "December 2019") |
                            (data["Date"] == "January 2020") |
                            (data["Date"] == "February 2020") |
                            (data["Date"] == "March 2020")
                           ]

##### Save as a CSV File

In [None]:
# Send to a csv
before_jobkeeper.to_csv("Melbourne City Council Data/before_jobkeeper_pedestrian_count.csv", index=False)

### Pedestrian Traffic during JobKeeper

In [None]:
# Get the data
data = pd.read_csv("Melbourne City Council Data/Pedestrian_Activity_by_Postcode_per_month.csv")

# Create a list of all the JobKeeper Dates
date_list = ['April 2020', 'May 2020', 'June 2020', 'July 2020', 'August 2020', 'September 2020', 
             'October 2020', 'November 2020', 'December 2020', 'January 2021','February 2021', 'March 2021']

# Filter the data in the the JobKeeper dates
jobkeeper_pedestrian_data = data.loc[(data["Date"].isin(date_list))]

jobkeeper_pedestrian_data.to_csv("Melbourne City Council Data/during_jobkeeper_pedestrian_count.csv",index=False)

# Job Keeper

### Obtain and filter jobkeeper data by the Postcodes found in the Pedestrian Dataset

In [2]:
# Get the data
first_phase = pd.read_excel("jobkeeper data/jobkeeperdata.xlsx", sheet_name='First Phase',header=1, usecols="A:G")
extension_quarter1 = pd.read_excel("jobkeeper data/jobkeeperdata.xlsx", sheet_name='Extension - First Quarter',header=1, usecols="A:D")
extension_quarter2 = pd.read_excel("jobkeeper data/jobkeeperdata.xlsx", sheet_name='Extension - Second Quarter',header=1, usecols="A:D")

# Save all of them to csv sheets
first_phase.to_csv("jkd_first_phase.csv", index=False)
extension_quarter1.to_csv("jobkeeper data/jkd_extension_quarter1.csv", index=False)
extension_quarter2.to_csv("jobkeeper data/jkd_extension_quarter2.csv", index=False)

In [3]:
# Get the data
first_phase_data = pd.read_csv("jobkeeper data/jkd_first_phase.csv")
first_extension_data = pd.read_csv("jobkeeper data/jkd_extension_quarter1.csv")
second_extension_data = pd.read_csv("jobkeeper data/jkd_extension_quarter2.csv")

# Convert the Postcodes of each dataFrame to strings
first_phase_data['Postcode'] = pd.Series(first_phase_data['Postcode'], dtype=pd.StringDtype())
first_extension_data['Postcode'] = pd.Series(first_extension_data['Postcode'], dtype=pd.StringDtype())
second_extension_data['Postcode'] = pd.Series(second_extension_data['Postcode'], dtype=pd.StringDtype())

# Cut the postcodes to the right length
first_phase_data['Postcode'] = first_phase_data['Postcode'].str.slice(start=2)
first_extension_data['Postcode'] = first_extension_data['Postcode'].str.slice(start=2)
second_extension_data['Postcode'] = second_extension_data['Postcode'].str.slice(start=2)

In [5]:
# Get all the first phase data
MCC_first_phase_data = first_phase_data.loc[(first_phase_data["Postcode"] == "3000") |
                                            (first_phase_data["Postcode"] == "3004") |
                                            (first_phase_data["Postcode"] == "3006") |
                                            (first_phase_data["Postcode"] == "3008") |
                                            (first_phase_data["Postcode"] == "3010") |
                                            (first_phase_data["Postcode"] == "3031") |
                                            (first_phase_data["Postcode"] == "3051") |
                                            (first_phase_data["Postcode"] == "3052") |
                                            (first_phase_data["Postcode"] == "3053")
                                           ]

# Get all the first extension data
MCC_first_extension_data = first_extension_data.loc[
                                            (first_extension_data["Postcode"] == "3000") |
                                            (first_extension_data["Postcode"] == "3004") |
                                            (first_extension_data["Postcode"] == "3006") |
                                            (first_extension_data["Postcode"] == "3008") |
                                            (first_extension_data["Postcode"] == "3010") |
                                            (first_extension_data["Postcode"] == "3031") |
                                            (first_extension_data["Postcode"] == "3051") |
                                            (first_extension_data["Postcode"] == "3052") |
                                            (first_extension_data["Postcode"] == "3053")
                                           ]

# # Get all the second extension data
MCC_second_extension_data = second_extension_data.loc[(second_extension_data["Postcode"] == "3000") |
                                            (second_extension_data["Postcode"] == "3004") |
                                            (second_extension_data["Postcode"] == "3006") |
                                            (second_extension_data["Postcode"] == "3008") |
                                            (second_extension_data["Postcode"] == "3010") |
                                            (second_extension_data["Postcode"] == "3031") |
                                            (second_extension_data["Postcode"] == "3051") |
                                            (second_extension_data["Postcode"] == "3052") |
                                            (second_extension_data["Postcode"] == "3053")
                                           ]

## Merge the data frames
application_counts = MCC_first_phase_data.merge(MCC_first_extension_data, on="Postcode").merge(MCC_second_extension_data, on="Postcode")

application_counts = application_counts.rename(columns={"April Application Count": "April 2020",
                                                       "May Application Count": "May 2020",
                                                        "June Application Count": "June 2020",
                                                        "July Application Count": "July 2020",
                                                        "August Application Count": "August 2020",
                                                        "September Application Count": "September 2020",
                                                        "October Application Count": "October 2020",
                                                        "November Application Count": "November 2020",
                                                        "December Application Count": "Decemeber 2020",
                                                        "January Application Count": "January 2021",
                                                        "February Application Count": "February 2021",
                                                        "March Application Count": "March 2021"
                                                       })



### Create a new CSV file with the filtered JobKeeper data

In [10]:
# Save the file to a csv
application_counts.to_csv("jobkeeper data/application_counts.csv", index=False)