In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import os
import time
import haversine as hs
from haversine import Unit
import openrouteservice as ors
import folium

In [2]:
# read rental data
path = os.getcwd().replace("notebooks","") + "data/curated/"
train = pd.read_csv(path+'train_station.csv', low_memory = False)
property_df = pd.read_csv(path+'cleaned_rent.csv', low_memory = False)
train["coordinates"] = list(zip(train.LATITUDE, train.LONGITUDE))
#GNR = GNR.drop_duplicates(subset='coordinates', keep="first")

In [3]:
train.head(5)

Unnamed: 0.1,Unnamed: 0,STOP_NAME,LATITUDE,LONGITUDE,geometry,coordinates
0,0,Royal Park Railway Station (Parkville),-37.781193,144.952301,POINT (2495798.8483493025 2413310.211043857),"(-37.781193, 144.952301)"
1,1,Flemington Bridge Railway Station (North Melbo...,-37.78814,144.939323,POINT (2494656.139461309 2412538.540406987),"(-37.78814, 144.939323)"
2,2,Macaulay Railway Station (North Melbourne),-37.794267,144.936166,POINT (2494378.5109065063 2411858.3440139396),"(-37.794267, 144.936166)"
3,3,North Melbourne Railway Station (West Melbourne),-37.807419,144.94257,POINT (2494943.4098035772 2410399.010290851),"(-37.807419, 144.94257)"
4,4,Clifton Hill Railway Station (Clifton Hill),-37.788657,144.995417,POINT (2499596.850108406 2412482.847753912),"(-37.788657, 144.995417)"


In [4]:
# combine longtitude and latitude to coordnates and use first 10 property data as sample
property_df["coordinates"] = list(zip(property_df.latitude, property_df.longitude))
property_df = property_df.iloc[:10]
property_df.head(5)

Unnamed: 0,address,rent,features,type,furnitured,pool,gym,latitude,longitude,postcode,num_bed,num_bath,num_park,rent_weekly,coordinates
0,1414/218-228 A'Beckett Street Melbourne VIC 3000,$400 Per Week,1 Bed1 Bath− Parking,Apartment / Unit / Flat,No,No,No,-37.810283,144.956669,3000,1,1,0,400,"(-37.8102832, 144.9566691)"
1,11a/131 Lonsdale Sreet Melbourne VIC 3000,$350 per week,1 Bed1 Bath− Parking,Studio,Yes,No,No,-37.810779,144.968551,3000,1,1,0,350,"(-37.810779, 144.9685513)"
2,911/408 Lonsdale Street Melbourne VIC 3000,$330 per week,1 Bed1 Bath− Parking,Apartment / Unit / Flat,No,No,No,-37.812598,144.960401,3000,1,1,0,330,"(-37.8125979, 144.9604012)"
3,918/422 Collins St Melbourne VIC 3000,$600 Per week fully furnished,2 Beds1 Bath− Parking,Apartment / Unit / Flat,No,No,No,-37.817097,144.960149,3000,2,1,0,600,"(-37.8170971, 144.9601487)"
4,602/118 Franklin Street Melbourne VIC 3000,$330,1 Bed1 Bath− Parking,Apartment / Unit / Flat,No,No,No,-37.808205,144.958903,3000,1,1,0,330,"(-37.8082052, 144.9589035)"


In [13]:

# comparing the distances and record three cloest points
def cloest_point(dist_dict,dist,loc,stop):
    # if there is no point, then just append the point into dict
    if len(dist_dict) < 3:
        dist_dict.append((dist,loc,stop))
    # if there already has three points, then compares distance
    else:
        # if the current distance smaller than records' distance
        if dist < dist_dict[0][0]:
            # delete the record point
            dist_dict.pop(0)
            # append new cloest point
            dist_dict.append((dist,loc,stop))
        elif dist < dist_dict[1][0]:
            # delete the record point
            dist_dict.pop(1)
            # append new cloest point
            dist_dict.append((dist,loc,stop))
        elif dist < dist_dict[2][0]:
            # delete the record point
            dist_dict.pop(2)
            # append new cloest point
            dist_dict.append((dist,loc,stop))
    return dist_dict
            
# calculate the cloest three point of interest for each property data
def distance(loc1):
    # read train station data
    train = pd.read_csv(path+'train_station.csv', low_memory = False)
    # extact all features
    train_stop = list(train["STOP_NAME"].unique())
    # initaliza the dict for record the cloest three point of interest
    dist_lis = []
    # find cloest three points
    for i in range(train.shape[0]):
        # feature points
        loc2 = (train.iloc[i]["LATITUDE"],train.iloc[i]["LONGITUDE"])
        # calculate distance between property and feature
        dist = hs.haversine(loc1,loc2,unit=Unit.METERS)
        # check the cloest
        dist_lis = cloest_point(dist_lis,dist,loc2,train.iloc[i]["STOP_NAME"])
    return dist_lis

In [14]:
property_df["train_station"]= property_df["coordinates"].apply(distance)  # calculate distance for each property row

In [15]:
property_df.head(5)

Unnamed: 0,address,rent,features,type,furnitured,pool,gym,latitude,longitude,postcode,num_bed,num_bath,num_park,rent_weekly,coordinates,train_station
0,1414/218-228 A'Beckett Street Melbourne VIC 3000,$400 Per Week,1 Bed1 Bath− Parking,Apartment / Unit / Flat,No,No,No,-37.810283,144.956669,3000,1,1,0,400,"(-37.8102832, 144.9566691)","[(968.2301748529471, (-37.817936, 144.951411),..."
1,11a/131 Lonsdale Sreet Melbourne VIC 3000,$350 per week,1 Bed1 Bath− Parking,Studio,Yes,No,No,-37.810779,144.968551,3000,1,1,0,350,"(-37.810779, 144.9685513)","[(531.6128151500881, (-37.809939, 144.962594),..."
2,911/408 Lonsdale Street Melbourne VIC 3000,$330 per week,1 Bed1 Bath− Parking,Apartment / Unit / Flat,No,No,No,-37.812598,144.960401,3000,1,1,0,330,"(-37.8125979, 144.9604012)","[(422.63001113194304, (-37.811981, 144.955654)..."
3,918/422 Collins St Melbourne VIC 3000,$600 Per week fully furnished,2 Beds1 Bath− Parking,Apartment / Unit / Flat,No,No,No,-37.817097,144.960149,3000,2,1,0,600,"(-37.8170971, 144.9601487)","[(692.4760990676032, (-37.811981, 144.955654),..."
4,602/118 Franklin Street Melbourne VIC 3000,$330,1 Bed1 Bath− Parking,Apartment / Unit / Flat,No,No,No,-37.808205,144.958903,3000,1,1,0,330,"(-37.8082052, 144.9589035)","[(1266.4785902722447, (-37.817936, 144.951411)..."


In [16]:
property_df["train_station"][0]

[(968.2301748529471,
  (-37.817936, 144.951411),
  'Southern Cross Railway Station (Melbourne City)'),
 (208.7885192155693,
  (-37.811981, 144.955654),
  'Flagstaff Railway Station (Melbourne City)'),
 (521.9037087436633,
  (-37.809939, 144.962594),
  'Melbourne Central Railway Station (Melbourne City)')]

In [17]:
# put two coordinate in and return the duration between two coordinates
# coordinate form [longitude, latitude]
def calculate_distance_between_coordinates(coordinate1, coordinate2):

    # put your own open route service api key in here
    api_key_openrouteservice = '5b3ce3597851110001cf6248d864908ae526479e86e6f4dd70971a37'

    # connect open route service
    client = ors.Client(key = api_key_openrouteservice)

    # put two coordinates in list
    cor = [coordinate1, coordinate2]

    # using open route service
    route = client.directions(
    coordinates= cor,
    profile='driving-car',
    format='geojson',
    )

    # dict of distance and duration
    dist = route['features'][0]['properties']['segments'][0]['distance']
    duration = route['features'][0]['properties']['segments'][0]['duration']
    
    # return the duration
    return dist, duration

In [18]:
nearest_train_list = []
nearest_distance_list = []
nearest_duration_list = []
for i in range(len(property_df)):
    # print(i)
    long = property_df["longitude"][i]
    lat = property_df["latitude"][i]
    property_coordinate = [long, lat]

    # print(property_coordinate)

    train = property_df["train_station"][i]

    duration = []
    distance = []

    for j in range(len(train)):
        train_coord = train[j][1]
        train_long = train_coord[1]
        train_lat = train_coord[0]

        train_position = [train_long, train_lat]
        # print(train_position)
        dist_in_between, duration_in_detween = calculate_distance_between_coordinates(property_coordinate, train_position)

        duration.append(duration_in_detween)
        distance.append(dist_in_between)
    
    for k in range(1, len(duration)):
        if duration[k-1] <= duration[k]:
            nearest_point_index = k-1
            nearest_duration = duration[k-1]
            nearest_distance = distance[k-1]

        else:
            nearest_point_index = k
            nearest_duration = duration[k]
            nearest_distance = distance[k]

    nearest_train = train[nearest_point_index]
    print(nearest_train)
    nearest_train_list.append(nearest_train[2])
    nearest_distance_list.append(nearest_distance)
    nearest_duration_list.append(nearest_duration)

(208.7885192155693, (-37.811981, 144.955654), 'Flagstaff Railway Station (Melbourne City)')
(384.2112913357483, (-37.811054, 144.972911), 'Parliament Railway Station (Melbourne City)')
(352.87432145734346, (-37.809939, 144.962594), 'Melbourne Central Railway Station (Melbourne City)')
(824.4220620363707, (-37.809939, 144.962594), 'Melbourne Central Railway Station (Melbourne City)')
(377.20250553446317, (-37.809939, 144.962594), 'Melbourne Central Railway Station (Melbourne City)')
(536.9661892341837, (-37.811054, 144.972911), 'Parliament Railway Station (Melbourne City)')
(370.87885470950124, (-37.811981, 144.955654), 'Flagstaff Railway Station (Melbourne City)')
(539.0886706587495, (-37.811054, 144.972911), 'Parliament Railway Station (Melbourne City)')
(1009.6294735440555, (-37.811981, 144.955654), 'Flagstaff Railway Station (Melbourne City)')




(352.87432145734346, (-37.809939, 144.962594), 'Melbourne Central Railway Station (Melbourne City)')


In [19]:
property_df["nearest_train"] = nearest_train_list
property_df["nearest_distance(m)"] = nearest_distance_list
property_df["nearest_duration(s)"] = nearest_duration_list
property_df = property_df[['address','coordinates', 'train_station', 'nearest_train','nearest_distance(m)', 'nearest_duration(s)']]

property_df.to_csv("../data/curated/dist_property_train.csv")

In [20]:
property_df

Unnamed: 0,address,coordinates,train_station,nearest_train,nearest_distance(m),nearest_duration(s)
0,1414/218-228 A'Beckett Street Melbourne VIC 3000,"(-37.8102832, 144.9566691)","[(968.2301748529471, (-37.817936, 144.951411),...",Flagstaff Railway Station (Melbourne City),282.0,41.4
1,11a/131 Lonsdale Sreet Melbourne VIC 3000,"(-37.810779, 144.9685513)","[(531.6128151500881, (-37.809939, 144.962594),...",Parliament Railway Station (Melbourne City),762.1,117.8
2,911/408 Lonsdale Street Melbourne VIC 3000,"(-37.8125979, 144.9604012)","[(422.63001113194304, (-37.811981, 144.955654)...",Melbourne Central Railway Station (Melbourne C...,515.3,69.9
3,918/422 Collins St Melbourne VIC 3000,"(-37.8170971, 144.9601487)","[(692.4760990676032, (-37.811981, 144.955654),...",Melbourne Central Railway Station (Melbourne C...,1172.5,139.5
4,602/118 Franklin Street Melbourne VIC 3000,"(-37.8082052, 144.9589035)","[(1266.4785902722447, (-37.817936, 144.951411)...",Melbourne Central Railway Station (Melbourne C...,533.6,62.6
5,1112/333-351 Exhibition Street Melbourne VIC 3000,"(-37.80789559999999, 144.9682873)","[(549.3521437399352, (-37.809939, 144.962594),...",Parliament Railway Station (Melbourne City),983.1,140.9
6,3002/288 Spencer St Melbourne VIC 3000,"(-37.813775, 144.9520948)","[(466.56544569832755, (-37.817936, 144.951411)...",Flagstaff Railway Station (Melbourne City),619.8,123.7
7,4/180 Little Collins Street Melbourne VIC 3000,"(-37.8138601, 144.9679067)","[(638.6833490458583, (-37.809939, 144.962594),...",Parliament Railway Station (Melbourne City),743.6,107.1
8,1605/565 Flinders Street Melbourne VIC 3000,"(-37.8210586, 144.9559072)","[(525.867016936961, (-37.817936, 144.951411), ...",Flagstaff Railway Station (Melbourne City),1548.8,215.3
9,612/408 Lonsdale Street Melbourne VIC 3000,"(-37.8125979, 144.9604012)","[(422.63001113194304, (-37.811981, 144.955654)...",Melbourne Central Railway Station (Melbourne C...,515.3,69.9


#### for further coding

In [None]:
def find_closest(loc1, train_points):
    for point in train_points:
        dist, duration = calculate_distance_between_coordinates(loc1,point[1])
    return dist, duration

In [None]:
# generate a dict for each property data that contains distance and coordination of cloest three features
def generate_dict(features):
    dist_lis = {}
    # initialize feature
    for feature in features:
        dist_lis[feature] = []
    return dist_lis

# comparing the distances and record three cloest points
def cloest_point(point,dist_dict,dist,loc):
    # if there is no point, then just append the point into dict
    if len(dist_dict[point["FEATURE"]]) < 3:
        dist_dict[point["FEATURE"]].append((dist,loc,point["PLACE_NAME"]))
    # if there already has three points, then compares distance
    else:
        # if the current distance smaller than records' distance
        if dist < dist_dict[point["FEATURE"]][0][0]:
            # delete the record point
            dist_dict[point["FEATURE"]].pop(0)
            # append new cloest point
            dist_dict[point["FEATURE"]].append((dist,loc,point["PLACE_NAME"]))
        elif dist < dist_dict[point["FEATURE"]][1][0]:
            # delete the record point
            dist_dict[point["FEATURE"]].pop(1)
            # append new cloest point
            dist_dict[point["FEATURE"]].append((dist,loc,point["PLACE_NAME"]))
        elif dist < dist_dict[point["FEATURE"]][2][0]:
            # delete the record point
            dist_dict[point["FEATURE"]].pop(2)
            # append new cloest point
            dist_dict[point["FEATURE"]].append((dist,loc,point["PLACE_NAME"]))
    return dist_dict
            
# calculate the cloest three point of interest for each property data
def distance(loc1):
    # read point of interest data
    GNR = pd.read_csv(path+'GNR_suburb.csv', low_memory = False)
    # extact all features
    Point_of_Interest = list(GNR["FEATURE"].unique())
    # initaliza the dict for record the cloest three point of interest
    dist_lis = generate_dict(Point_of_Interest)
    # find cloest three points
    for feature in Point_of_Interest:
        df = GNR[GNR["FEATURE"] == feature]  # select feature data
        # calculate  distances for all feature points
        for i in range(df.shape[0]):
            # feature points
            loc2 = (df.iloc[i]["LATITUDE"],df.iloc[i]["LONGITUDE"])
            # calculate distance between property and feature
            dist = hs.haversine(loc1,loc2,unit=Unit.METERS)
            # check the cloest
            dist_lis = cloest_point(df.iloc[i],dist_lis,dist,loc2)
    return dist_lis