In [1]:
import geocoder

In [2]:
import pandas as pd
import folium
import time

In [3]:
import geopy
import pandas as pd

def get_zipcode(df, geolocator, lat_field, lon_field):
    try:
        location = geolocator.reverse((df[lat_field], df[lon_field]))
        return location.raw['address']['postcode']
    except Exception as e:
        return None
    
geolocator = geopy.Nominatim(user_agent='zixguo@student.unimelb.edu.au') #My OpenMap username



### Read data

In [4]:
# read postcode centroid data
df_all_centroid = pd.read_csv('../../data/raw/external-data/postcode_centroid.csv')

In [5]:
# read shopping centers data
df_shopping_centers = pd.read_csv("../../data/raw/external-data/shopping_centers.csv")

### pre-processing for shopping centers data

In [6]:
# Extract latitude and longitude
import re
r = '(\s[-]{0,1}[\d]+.[\d]+)'
df_shopping_centers["LATITUDE"] = df_shopping_centers.apply(lambda x: float(re.findall(r,x["Adress"])[0]), axis=1)
df_shopping_centers["LONGITUDE"] = df_shopping_centers["Adress"].apply(lambda x: float(re.findall(r,x)[1]))
df_shopping_centers

Unnamed: 0,Name,Adress,LATITUDE,LONGITUDE
0,206 Bourke Street,"Victoria, Melbourne, GPS: -37.812733, 144.9669...",-37.812733,144.966947
1,Acland Court Shopping Centre,"Victoria, St Kilda, GPS: -37.868967, 144.98061...",-37.868967,144.980617
2,Altona Gate Shopping Centre,"Victoria, Melbourne, GPS: -37.828989, 144.8462...",-37.828989,144.846270
3,Balnarring Village Shopping Centre,"Victoria, Balnarring, GPS: -38.373199, 145.125...",-38.373199,145.125010
4,Barkly Square,"Victoria, Brunswick, GPS: -37.775695, 144.9620...",-37.775695,144.962004
...,...,...,...,...
186,Wodonga Plaza,"Victoria, Wodonga, GPS: -36.121153, 146.881917...",-36.121153,146.881917
187,Woodgrove Shopping Centre,"Victoria, Melton, GPS: -37.686167, 144.56127 |...",-37.686167,144.561270
188,Woolworths Whitebox Rise Wodonga,"Victoria, Wodonga, GPS: -36.139008, 146.892456...",-36.139008,146.892456
189,Wyndham Vale Square Shopping Centre,"Victoria, Wyndham Vale, GPS: -37.888057, 144.6...",-37.888057,144.607036


In [7]:
# add postcode for each shopping center
zipcodes_shopping_centers = df_shopping_centers.apply(
    get_zipcode, axis=1, geolocator=geolocator, 
    lat_field='LATITUDE', lon_field='LONGITUDE')

df_shopping_centers_postcode = df_shopping_centers.assign(postcode = zipcodes_shopping_centers)
df_shopping_centers_postcode

Unnamed: 0,Name,Adress,LATITUDE,LONGITUDE,postcode
0,206 Bourke Street,"Victoria, Melbourne, GPS: -37.812733, 144.9669...",-37.812733,144.966947,3000
1,Acland Court Shopping Centre,"Victoria, St Kilda, GPS: -37.868967, 144.98061...",-37.868967,144.980617,3182
2,Altona Gate Shopping Centre,"Victoria, Melbourne, GPS: -37.828989, 144.8462...",-37.828989,144.846270,3025
3,Balnarring Village Shopping Centre,"Victoria, Balnarring, GPS: -38.373199, 145.125...",-38.373199,145.125010,3927
4,Barkly Square,"Victoria, Brunswick, GPS: -37.775695, 144.9620...",-37.775695,144.962004,3056
...,...,...,...,...,...
186,Wodonga Plaza,"Victoria, Wodonga, GPS: -36.121153, 146.881917...",-36.121153,146.881917,3690
187,Woodgrove Shopping Centre,"Victoria, Melton, GPS: -37.686167, 144.56127 |...",-37.686167,144.561270,3337
188,Woolworths Whitebox Rise Wodonga,"Victoria, Wodonga, GPS: -36.139008, 146.892456...",-36.139008,146.892456,3690
189,Wyndham Vale Square Shopping Centre,"Victoria, Wyndham Vale, GPS: -37.888057, 144.6...",-37.888057,144.607036,3024


In [8]:
# check if every shopping centers data have postcode
df_null = df_shopping_centers_postcode[df_shopping_centers_postcode['postcode'].isna()]
df_null

Unnamed: 0,Name,Adress,LATITUDE,LONGITUDE,postcode
107,Mitcham Shopping Centre,"Victoria, Mitcham, GPS: -37.815739, 145.192535...",-37.815739,145.192535,


In [10]:
# use latitude and longitude and shapefile to find the postcode of Mitcham Shopping Centre
import geopandas as gpd
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

# define a function to find postcode by shapefile
def check_post(point):
    return shapedf.loc[shapefile["geometry"].apply(lambda x: x.contains(point))]["POSTCODE"].values[0]

# read shapefile
shapedf = pd.DataFrame(gpd.read_file("../data/raw/VMADMIN/POSTCODE_POLYGON.shp"))

# use for loop to find all the missing postcode
for i in df_null["Name"].astype("string").values:
    null_point = Point(df_shopping_centers_postcode[df_shopping_centers_postcode['Name']==i]["LONGITUDE"], df_shopping_centers_postcode[df_shopping_centers_postcode['Name']==i]["LATITUDE"])
    code = shapedf.loc[shapedf["geometry"].apply(lambda x: x.contains(null_point))]["POSTCODE"]
    code = int(code.values[0])
    df_shopping_centers_postcode.loc[df_shopping_centers_postcode["Name"]==i, "postcode"]=code
    
# check if all the missing postcode find postcode by shapefile
df_shopping_centers_postcode[df_shopping_centers_postcode['postcode'].isna()]

Unnamed: 0,Name,Adress,LATITUDE,LONGITUDE,postcode


In [11]:
# make sure postcode data for both dataframe are the same data type
df_shopping_centers_postcode.loc[:,"postcode"] = df_shopping_centers_postcode["postcode"].astype(str).str.strip()
df_all_centroid.loc[:,"POSTCODE"] = df_all_centroid["POSTCODE"].astype(str).str.strip()

In [12]:
import openrouteservice as ors
client = ors.Client(key='your_key') # Provide your personal API key

In [13]:
def nearest(df_all_property, df_facility, index):
    
    
    i = index
    
    #get the postcode of the property and find all train stations in this postcode
    postcode = df_all_property['POSTCODE'][i]
    facility = df_facility.loc[df_facility['postcode'] == postcode].to_dict()
    #print(list(facility['STOP_NAME'].values()))
    lat = list(facility["LATITUDE"].values())
    lon = list(facility["LONGITUDE"].values())
    if len(lat) == 0:
        return (postcode, 'none', 'none', 'none', 'none')
 
    
    loc = [list(eval(df_all_property['centroid'][i]))]
        
        
    #get the location of the stations
    for j in range(0, len(lat)):
        loc.append([lon[j], lat[j]])
        
    #print(loc)
    #get the distance and duration between the properties and stations
    matrix = client.distance_matrix(
    locations = loc,
    sources = list(range(1, len(loc))),
    destinations = [0],
    metrics=['distance', 'duration'],
    )
    
    #get the shortest duration among all stations
    #print('distance: ', matrix['distances'])
    #print('durations: ',matrix['durations'])
    nearest_distance = min(matrix['distances'])[0]
    nearest_duration = min(matrix['durations'])[0]                                                    
        
    index = matrix['durations'].index(min(matrix['durations']))
    #print('index:', index)
    nearest_name = list(facility['Name'].values())[index]
    nearest_latlon = loc[index + 1] 
    #print(nearest_name, nearest_latlon)
    return (postcode, nearest_name, nearest_distance, nearest_duration, nearest_latlon)


In [14]:
# set a empty dictionary of list to record all the feature below for each postcode
shopping_centers = {"postcode":[], "lst_nearest_latlon":[], "nearest_station_name":[],
                 "nearest_duration":[], "nearest_distance":[]}

In [15]:
for i in range(0, len(df_all_centroid)):   
    
    if isinstance(i % 20, int) and (i % 20 == 0):
        time.sleep(10) 
    
    
    result = nearest(df_all_centroid, df_shopping_centers_postcode, i)
    shopping_centers["nearest_station_name"].append(result[1])
    shopping_centers["nearest_distance"].append(result[2])
    shopping_centers["nearest_duration"].append(result[3])
    shopping_centers["postcode"].append(result[0])
    shopping_centers["lst_nearest_latlon"].append(result[4])
    



In [16]:
# convert to dataframe
shopping_centers = pd.DataFrame(shopping_centers)

In [17]:
shopping_centers

Unnamed: 0,postcode,lst_nearest_latlon,nearest_station_name,nearest_duration,nearest_distance
0,3131,"[145.164718, -37.835453]",Forest Hill Chase Shopping Centre,280.66,2020.1
1,3939,none,none,none,none
2,3429,"[144.729432, -37.577542]",Sunbury Square Shopping Centre,376.96,3663.28
3,3144,"[145.027026, -37.863028]",Malvern Central,288.87,2192.38
4,3338,none,none,none,none
...,...,...,...,...,...
175,3003,none,none,none,none
176,3008,"[144.952153, -37.816219]",Spencer Outlet Centre,207.21,1757.84
177,3207,none,none,none,none
178,3032,"[144.888781, -37.77369]",Highpoint Shopping Centre,372.4,2691.73


In [19]:
# save as csv
shopping_centers.to_csv('../data/raw/ors-data/shopping_centers_by_postcode.csv')