#### Compute postcode columns for Tram and Train dataset in VIC

In [1]:
import pandas as pd
import geopandas as gpd

In [2]:
# sf stands for shape file

tram_sf = gpd.read_file("PTV/PTV_METRO_TRAM_STOP.shp")
train_sf = gpd.read_file("TRANSPORT/VIC_RAILWAY_STATIONS.shp")
property = pd.read_csv("../Scraping/PropertiesInfo_.csv")

In [3]:
train_sf.head(10)

Unnamed: 0,STATION,STATUS,STOPID_MTR,STID_VLINE,PREMIUM,METRO,VLINE,INTERSTATE,STOP_ZONE,COMMENTS,geometry
0,Dandenong,Active,19888.0,22250.0,Yes,Yes,Yes,No,"Reg, 2",Metro Interchange,POINT (2518416.074 2390122.665)
1,Darebin,Active,19932.0,,No,Yes,No,No,1,,POINT (2503402.928 2414018.324)
2,Darling,Active,19837.0,,Yes,Yes,No,No,"1, 2",,POINT (2505514.840 2403599.123)
3,Deer Park,Active,,19982.0,No,No,Yes,No,"Reg, 2",,POINT (2479846.449 2413695.029)
4,Dennis,Active,19929.0,,No,Yes,No,No,1,,POINT (2500767.433 2413533.196)
5,Diamond Creek,Active,19988.0,,No,Yes,No,No,2,,POINT (2514003.322 2425270.695)
6,Diggers Rest,Active,15353.0,,No,Yes,No,No,"Reg, 2",,POINT (2475289.928 2430304.627)
7,Dimboola,Active,,20617.0,No,No,No,Yes,,Interstate (Overland),POINT (2233991.956 2556541.948)
8,Dingee,Active,,20306.0,No,No,Yes,No,Reg,,POINT (2431004.390 2569738.362)
9,Donnybrook,Active,,20307.0,No,No,Yes,No,2,,POINT (2497363.855 2439841.887)


In [4]:
tram_sf.head(10)

Unnamed: 0,STOP_ID,STOP_NAME,LATITUDE,LONGITUDE,TICKETZONE,ROUTEUSSP,geometry
0,18730,134-Merribell Ave/Nicholson St (Coburg),-37.744359,144.977728,1,1,POINT (144.97773 -37.74435)
1,18732,44-Deepdene Park/Whitehorse Rd (Balwyn),-37.811375,145.068671,1,109,POINT (145.06868 -37.81136)
2,18733,45-Hardwicke St/Whitehorse Rd (Balwyn),-37.81175,145.071785,1,109,POINT (145.07179 -37.81174)
3,18734,46-Balwyn Cinema/Whitehorse Rd (Balwyn),-37.812242,145.07593,1,109,POINT (145.07594 -37.81223)
4,18735,47-Balwyn Rd/Whitehorse Rd (Balwyn),-37.812919,145.081524,12,109,POINT (145.08153 -37.81291)
5,18736,48-Balwyn Shopping Centre/Whitehorse Rd (Balwyn),-37.813199,145.083788,12,109,POINT (145.08379 -37.81319)
6,18737,49-Northcote Ave/Whitehorse Rd (Balwyn),-37.81369,145.087956,12,109,POINT (145.08796 -37.81368)
7,18738,51-Narrak Rd/Whitehorse Rd (Mont Albert),-37.814564,145.09517,12,109,POINT (145.09518 -37.81455)
8,18781,134-Orrong Rd/Toorak Rd (Toorak),-37.841755,145.014048,1,58,POINT (145.01405 -37.84174)
9,18782,135-Woorigoleen Rd/Toorak Rd (Toorak),-37.842166,145.017583,1,58,POINT (145.01759 -37.84215)


### Use reverse geometry to get address details from latitudes and longitudes

In [5]:
import geopy
from geopy.geocoders import GoogleV3
import re
import googlemaps

#### Google Maps API reference https://github.com/googlemaps/google-maps-services-python.
##### Please do not call too many times as 10,000 per day quota applies

In [6]:
# Takes around 30 mins to execute
def add_geoFeatures(sf):
    # Convert the geometry shape to to latitude and longitude
    # Please attribute this if you are using it
    sf['geometry'] = sf['geometry'].to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
    # convert geometry point as a pair of latitude, longitude
    sf['coordinates'] = list(zip(sf['geometry'].y, sf['geometry'].x))
    locator = GoogleV3(api_key='AIzaSyCSmyP2Pxq7lMHE7w27m2he1l-RtreJdAQ')
    
    postcodes = []
    suburbs = []
    gmaps = googlemaps.Client(key='AIzaSyCSmyP2Pxq7lMHE7w27m2he1l-RtreJdAQ')
    for index, row in sf.iterrows():
        # grand list of dictionaries containing address components
        reverse_geocode_result = gmaps.reverse_geocode(sf.iloc[index]['coordinates'])
        # if postal_code is present in current list of dictionaries
        post_result = [x for x in reverse_geocode_result if (('address_components' in x) and (len([y for y in x['address_components'] if y['types'] == ['postal_code']])) )]

        # if locality is present in current list of dictionaries
        suburb_result = [x for x in reverse_geocode_result if (('address_components' in x) and (len([y for y in x['address_components'] if y['types'] == ['locality', 'political']])) )]
        # postcode is stored as a value to key 'long_name'
        code = [x['long_name'] for x in post_result[0]['address_components'] if x['types'] == ['postal_code']][0]
        suburb = [x['long_name'] for x in suburb_result[0]['address_components'] if x['types'] == ['locality', 'political']][0]
        postcodes.append(pd.to_numeric(code))
        suburbs.append(suburb.upper())
    sf['postcode'] = postcodes
    sf['suburb'] = suburbs
    # only works with tram sf, compute suburb names
    #if 'STOP_NAME' in sf.columns:
        # extract suburbs
    #    r1 = '\((.+)\)'
    #    pattern = re.compile(r1)
    #    sf['suburb'] = sf['STOP_NAME'].apply(lambda x: pattern.findall(str(x))[0].upper())
    #elif 'STATION' in sf.columns:
    #    sf['suburb'] = sf['coordinates'].apply(lambda x: locator.reverse(x).raw['address']['suburb'])

    return sf.drop(columns=['coordinates'])


In [7]:
tram_sf = add_geoFeatures(tram_sf)
train_sf = add_geoFeatures(train_sf)
tram_sf.to_csv("../../data/raw/tram_raw.csv")
train_sf.to_csv("../../data/raw/train_raw.csv")

In [8]:
train_sf.head(10)

Unnamed: 0,STATION,STATUS,STOPID_MTR,STID_VLINE,PREMIUM,METRO,VLINE,INTERSTATE,STOP_ZONE,COMMENTS,geometry,postcode,suburb
0,Dandenong,Active,19888.0,22250.0,Yes,Yes,Yes,No,"Reg, 2",Metro Interchange,POINT (145.20964 -37.98992),3175,DANDENONG
1,Darebin,Active,19932.0,,No,Yes,No,No,1,,POINT (145.03863 -37.77480),3079,IVANHOE
2,Darling,Active,19837.0,,Yes,Yes,No,No,"1, 2",,POINT (145.06268 -37.86867),3145,MALVERN EAST
3,Deer Park,Active,,19982.0,No,No,Yes,No,"Reg, 2",,POINT (144.77122 -37.77750),3023,DEER PARK
4,Dennis,Active,19929.0,,No,Yes,No,No,1,,POINT (145.00871 -37.77918),3070,NORTHCOTE
5,Diamond Creek,Active,19988.0,,No,Yes,No,No,2,,POINT (145.15874 -37.67332),3089,DIAMOND CREEK
6,Diggers Rest,Active,15353.0,,No,Yes,No,No,"Reg, 2",,POINT (144.72005 -37.62773),3427,DIGGERS REST
7,Dimboola,Active,,20617.0,No,No,No,Yes,,Interstate (Overland),POINT (142.03186 -36.45304),3414,DIMBOOLA
8,Dingee,Active,,20306.0,No,No,Yes,No,Reg,,POINT (144.23110 -36.36897),3571,DINGEE
9,Donnybrook,Active,,20307.0,No,No,Yes,No,2,,POINT (144.97017 -37.54212),3064,DONNYBROOK


In [9]:
tram_sf.head(10)

Unnamed: 0,STOP_ID,STOP_NAME,LATITUDE,LONGITUDE,TICKETZONE,ROUTEUSSP,geometry,postcode,suburb
0,18730,134-Merribell Ave/Nicholson St (Coburg),-37.744359,144.977728,1,1,POINT (144.97773 -37.74435),3058,COBURG
1,18732,44-Deepdene Park/Whitehorse Rd (Balwyn),-37.811375,145.068671,1,109,POINT (145.06868 -37.81136),3103,DEEPDENE
2,18733,45-Hardwicke St/Whitehorse Rd (Balwyn),-37.81175,145.071785,1,109,POINT (145.07179 -37.81174),3103,DEEPDENE
3,18734,46-Balwyn Cinema/Whitehorse Rd (Balwyn),-37.812242,145.07593,1,109,POINT (145.07594 -37.81223),3103,BALWYN
4,18735,47-Balwyn Rd/Whitehorse Rd (Balwyn),-37.812919,145.081524,12,109,POINT (145.08153 -37.81291),3103,BALWYN
5,18736,48-Balwyn Shopping Centre/Whitehorse Rd (Balwyn),-37.813199,145.083788,12,109,POINT (145.08379 -37.81319),3103,BALWYN
6,18737,49-Northcote Ave/Whitehorse Rd (Balwyn),-37.81369,145.087956,12,109,POINT (145.08796 -37.81368),3103,BALWYN
7,18738,51-Narrak Rd/Whitehorse Rd (Mont Albert),-37.814564,145.09517,12,109,POINT (145.09518 -37.81455),3127,SURREY HILLS
8,18781,134-Orrong Rd/Toorak Rd (Toorak),-37.841755,145.014048,1,58,POINT (145.01405 -37.84174),3142,TOORAK
9,18782,135-Woorigoleen Rd/Toorak Rd (Toorak),-37.842166,145.017583,1,58,POINT (145.01759 -37.84215),3142,TOORAK


In [10]:
property.head(10)

Unnamed: 0,id,time,listing_type,price,property_type,area,building_area,land_area,bedrooms,bathrooms,carspaces,street_address,suburb,postcode,latitude,longitude
0,16066003,2022-08-31T13:00:15,Rent,$320,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,0,"1007/238 Flinders St, Melbourne",MELBOURNE,3000,-37.817516,144.966492
1,16065974,2022-08-31T12:50:55,Rent,$700,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,2.0,0,"1211/260 Spencer Street, Melbourne",MELBOURNE,3000,-37.814564,144.952286
2,15302782,2022-08-31T10:12:30,Rent,$395 per week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,1.0,0,"504/350 La Trobe Street, Melbourne",MELBOURNE,3000,-37.81068,144.959274
3,16064379,2022-08-30T15:20:28,Rent,$370 per week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,0,"1106/555 Flinders Street, Melbourne",MELBOURNE,3000,-37.820972,144.956482
4,16064243,2022-08-30T14:50:32,Rent,$500 per week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,1.0,1,"2901/200 Spencer Street, Melbourne",MELBOURNE,3000,-37.816227,144.953247
5,16063884,2022-08-30T13:28:24,Rent,$2500 Per Week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,3.0,3.0,2,"5401/464 Collins Street, Melbourne",MELBOURNE,3000,-37.817562,144.958588
6,16063772,2022-08-30T13:01:47,Rent,$450.00 per week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,0,"405/39 Queen Street, Melbourne",MELBOURNE,3000,-37.817886,144.961731
7,16063139,2022-08-30T10:15:45,Rent,$530,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,3.0,2.0,0,"625/118 Franklin Street, Melbourne",MELBOURNE,3000,-37.808205,144.958908
8,16062720,2022-08-29T17:38:39,Rent,$550 per week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,1.0,0,"1206/118 Russell Street, Melbourne",MELBOURNE,3000,-37.813587,144.968719
9,16062428,2022-08-29T16:32:47,Rent,$680,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,2.0,0,"1411/120 A Beckett St, Melbourne",MELBOURNE,3000,-37.809307,144.959732


In [11]:
gdf = gpd.GeoDataFrame(
    pd.merge(property, tram_sf, on='postcode', how='inner')
)

gdf


Unnamed: 0,id,time,listing_type,price,property_type,area,building_area,land_area,bedrooms,bathrooms,...,latitude,longitude,STOP_ID,STOP_NAME,LATITUDE,LONGITUDE,TICKETZONE,ROUTEUSSP,geometry,suburb_y
0,16066003,2022-08-31T13:00:15,Rent,$320,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,...,-37.817516,144.966492,17591,7-Russell St/Bourke St (Melbourne City),-37.812769,144.968354,1,9686,POINT (144.96836 -37.81276),MELBOURNE
1,16066003,2022-08-31T13:00:15,Rent,$320,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,...,-37.817516,144.966492,17593,6-Swanston St/Bourke St (Melbourne City),-37.813484,144.965904,1,9686,POINT (144.96591 -37.81347),MELBOURNE
2,16066003,2022-08-31T13:00:15,Rent,$320,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,...,-37.817516,144.966492,17594,5-Elizabeth St/Bourke St (Melbourne City),-37.814217,144.963510,1,9686,POINT (144.96352 -37.81420),MELBOURNE
3,16066003,2022-08-31T13:00:15,Rent,$320,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,...,-37.817516,144.966492,17596,4-Queen St/Bourke St (Melbourne City),-37.815253,144.959835,1,9686,POINT (144.95984 -37.81524),MELBOURNE
4,16066003,2022-08-31T13:00:15,Rent,$320,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,...,-37.817516,144.966492,17597,3-William St/Bourke St (Melbourne City),-37.815882,144.957625,1,9686,POINT (144.95763 -37.81587),MELBOURNE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216280,15988109,2022-07-15T11:07:25,Rent,$480,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,...,-37.841232,144.938354,19723,127-North Port Station/Light Rail (Port Melbou...,-37.833431,144.943336,1,109,POINT (144.94334 -37.83342),PORT MELBOURNE
216281,15988109,2022-07-15T11:07:25,Rent,$480,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,...,-37.841232,144.938354,19724,128-Graham St/Light Rail (Port Melbourne),-37.837032,144.937463,1,109,POINT (144.93747 -37.83702),PORT MELBOURNE
216282,15988109,2022-07-15T11:07:25,Rent,$480,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,...,-37.841232,144.938354,19725,129-Beacon Cove/Light Rail (Port Melbourne),-37.840789,144.932813,1,109,POINT (144.93282 -37.84078),PORT MELBOURNE
216283,15988109,2022-07-15T11:07:25,Rent,$480,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,...,-37.841232,144.938354,19371,127-North Port Station/Light Rail (Port Melbou...,-37.833208,144.943490,1,109,POINT (144.94350 -37.83319),PORT MELBOURNE
