In [176]:
import os
import json
import time
import folium
import numpy as np
import pandas as pd
import seaborn as sns
import haversine as hs
from haversine import Unit
from datetime import datetime
import openrouteservice as ors
from difflib import SequenceMatcher
from IPython.display import display
from pandas.io.json import json_normalize

import geopy
import googlemaps
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from geopy.extra.rate_limiter import RateLimiter

## Get API Keys

In [None]:
with open('../data/raw/APIkeys.txt') as file:
    api_keys = file.readlines()
    api_keys = [key.rstrip() for key in api_keys]

## Merge Dataset

In [None]:
# read the dataset after data-cleaning
external = pd.read_csv('../data/curated/external.csv', low_memory = False)
property_df = pd.read_csv('../data/curated/cleaned_property_data.csv', low_memory = False)
rental = pd.read_csv('../data/curated/rental_median.csv', low_memory = False)
GNR = pd.read_csv('../data/curated/GNR_cleaned.csv', low_memory = False)
count_table = pd.read_csv('../data/curated/count_table.csv', low_memory = False)

#read postcode match suburb
with open('../data/raw/postcode_match_suburb.json') as json_data:
    data = json.load(json_data)
postcode_match = pd.DataFrame.from_dict({'postcode':data.keys(), 'suburb':data.values()})
postcode_match['postcode'] = pd.to_numeric(postcode_match['postcode'])

In [None]:
# display all dataframe
display(external.head(10))
display(property_df.head(10))
display(rental.head(10))
display(GNR.head(10))
display(postcode_match.head(10))
property_df.shape

In [None]:
# show the all types of property data
property_df['type'].unique()

In [None]:
# delete some unsusal data, treat them as outlier
property_df = property_df[property_df['type'] != 'Carspace']
property_df = property_df[property_df['type'] != 'Retirement']
property_df = property_df[property_df['type'] != 'Farm']
property_df = property_df[property_df['type'] != 'Acreage / Semi-Rural']
property_df = property_df[property_df['type'] != 'Rural']
property_df = property_df[property_df['type'] != 'New House & Land']
# re-classify the property data
property_df['type'] = property_df['type'].replace('Villa','House')
property_df['type'] = property_df['type'].replace('Semi-Detached','House')
property_df['type'] = property_df['type'].replace('Duplex','House')
property_df['type'] = property_df['type'].replace('New Apartments / Off the Plan','Apartment / Unit / Flat')
property_df['type'] = property_df['type'].replace('Terrace','Apartment / Unit / Flat')
property_df = property_df.reset_index(drop=True)

In [None]:
# split the address and get the suburbs
def extract_suburb(address):
    address = address.split(" ")
    if address[-3].isdigit():
        return address[-4]
    else:
        return address[-3]
property_df['suburb'] = property_df["address"].apply(extract_suburb)
property_df['suburb'] = property_df['suburb'].str.upper()  # make letter upper 
property_df['postcode'] = pd.to_numeric(property_df['postcode'])  # make sure the postcodes are int

In [None]:
# select the feature we need
property_df = property_df[['address', 'rent_weekly', 'floor', 'suburb','postcode', 'type', 'furnitured', 'pool',
                           'gym', 'num_bed', 'num_bath', 'num_car_park', 'coordinates']]
# convert coordinates from str to list
property_df['coordinates'] = property_df['coordinates'].apply(eval)

In [None]:
property_df.head(5)

In [None]:
# calculate the similarity percentage
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

# find the most similar suburb
def most_similar(df,suburb):
    if df['suburb'].shape[0] == 1:
        current_match = [100,df['suburb'].iloc[0][0]]
    else:
        # initialize the most similar suburb
        current_match = [0,"None"]
        # check similarity for each suburb
        for sub in df['suburb']:
            # get the similarity percentage
            simi_percent = similar(suburb, sub)
            # update most similar suburb
            if simi_percent > current_match[0]:
                current_match = [simi_percent, sub]
    # return the most similar suburb
    return current_match[1]

def correct_suburb(suburb_df, property_df):
    # check property_df each row's suburb
    for row in range(property_df.shape[0]):
        # get the property postcode
        postcode = property_df.loc[row, 'postcode']
        # get the postcode and corresponding suburbs
        match_df = suburb_df[suburb_df['postcode'] == postcode]
        sub_lis = list(match_df['suburb'])[0]
        # if the suburb matched, don't change it
        if property_df.loc[row, 'suburb'] in sub_lis:
            pass
        # if the suburb don't matched, replace by the most similar suburb by postcode
        else:
            most_match = most_similar(match_df, property_df.loc[row, 'suburb'])
            property_df.at[row, 'suburb'] = most_match
    return property_df


In [None]:
# correct the suburb names in property data
property_df = correct_suburb(postcode_match,property_df)  

In [None]:
property_df.shape

In [None]:
# rename the retal columns' names
rental.columns = ['suburb' if x=='Suburb' else x for x in rental.columns]
# make letter upper
rental['suburb'] = rental['suburb'].str.upper()
# let CBD represent MELBOURNE 3000
rental = rental.replace('CBD', 'MELBOURNE')

In [None]:
rental

In [None]:
# change the position of "EAST","WEST","NORTH","SOUTH" for some cases
def change_word_position(suburb):
    # split the suburb by space
    suburb_lis = suburb.split(" ")
    name =""
    # if the suburb name contains more than one word
    # then we need to have check the positions of ["EAST","WEST","NORTH","SOUTH"]
    if len(suburb_lis) >= 2:
        if suburb_lis[0] in ["EAST","WEST","NORTH","SOUTH"]:
            # if the suburb name contains words below, then doesn't need to change the position
            if suburb_lis[1] not in ["MELBOURNE",'GEELONG','BENDIGO','YEOBURN','WANGARATTA','WARBURTON','SALE',
                                     'BAIRNSDALE','YARRA','FOOTSCRAY']:
                # make the word of ["EAST","WEST","NORTH","SOUTH"] at the end of suburb name
                for i in range(1,len(suburb_lis)):
                    name+=(str(suburb_lis[i])+" ")
                name+=str(suburb_lis[0])
            # otherwise, doesn't need to change suburb name
            else:
                name = suburb
        else:
            name = suburb
    else:
        name = suburb
    return name
rental['suburb'] = rental['suburb'].apply(change_word_position)

In [None]:
# find the most similar suburb names for rental data
def correct_suburb(suburb_df, df):
    # check property_df each row's suburb
    for row in range(df.shape[0]):
        # all the suburb names
        sub_lis = list(set(suburb_df.suburb.sum()))
        # if the suburb matched, don't change it
        if df.loc[row, 'suburb'] in sub_lis:
            pass
        # if the suburb don't matched, replace by the most similar suburb by postcode
        else:
            most_match = most_similar(pd.DataFrame.from_dict({'suburb':sub_lis}), df.loc[row, 'suburb'])
            df.at[row, 'suburb'] = most_match
    return df
rental = correct_suburb(postcode_match,rental)

In [None]:
# merge rental data and property_df
property_df = pd.merge(property_df, rental, on='suburb', how='left').fillna(np.nan)

In [None]:
property_df

In [None]:
count_table

In [None]:
# merge count_table data and property_df
property_df = pd.merge(property_df, count_table, on='suburb', how='left').fillna(np.nan)
property_df

In [None]:
property_df.to_csv('../data/curated/final_property.csv',index=False)

# calculate distances

In [None]:
property_df = pd.read_csv('../data/curated/final_property.csv', low_memory=False)
property_df['coordinates'] = property_df['coordinates'].apply(eval)

In [None]:
# check if the distance bewteen two points are smaller or equal to 1.5 km
def around_1500m(loc, df):
    max_long = loc[1] + 0.01
    min_long = loc[1] - 0.01
    max_lan = loc[0] + 0.01
    min_lan = loc[0] - 0.01
    
    df = df[df['LONGITUDE'] <= max_long]
    df = df[df['LONGITUDE'] >= min_long]
    df = df[df['LATITUDE'] <= max_lan]
    df = df[df['LATITUDE'] >= min_lan]
    return df

# check if the distance bewteen two points are smaller or equal to 2 km
def around_2km(loc, df):
    max_long = loc[1] + 0.015
    min_long = loc[1] - 0.015
    max_lan = loc[0] + 0.015
    min_lan = loc[0] - 0.015

    df = df[df['LONGITUDE'] <= max_long]
    df = df[df['LONGITUDE'] >= min_long]
    df = df[df['LATITUDE'] <= max_lan]
    df = df[df['LATITUDE'] >= min_lan]
    return df

# check if the distance bewteen two points are smaller or equal to 3 km
def around_3km(loc, df):
    max_long = loc[1] + 0.02
    min_long = loc[1] - 0.02
    max_lan = loc[0] + 0.02
    min_lan = loc[0] - 0.02

    df = df[df['LONGITUDE'] <= max_long]
    df = df[df['LONGITUDE'] >= min_long]
    df = df[df['LATITUDE'] <= max_lan]
    df = df[df['LATITUDE'] >= min_lan]
    return df

# comparing the distances and record three cloest points
def cloest_point(dist_dict,dist,loc,stop):
    # if there is no point, then just append the point into dict
    if len(dist_dict) < 2:
        dist_dict.append((dist,loc,stop))
    # if there already has three points, then compares distance
    else:
        # if the current distance smaller than records' distance
        if dist < dist_dict[0][0]:
            # delete the record point
            dist_dict.pop(0)
            # append new cloest point
            dist_dict.append((dist,loc,stop))
        elif dist < dist_dict[1][0]:
            # delete the record point
            dist_dict.pop(1)
            # append new cloest point
            dist_dict.append((dist,loc,stop))
    return dist_dict
            
# calculate the cloest three train station for each property data
def distance_train(loc1):
    # read train station data
    GNR = pd.read_csv('../data/curated/GNR_cleaned.csv', low_memory = False)
    # initaliza the dict for record the cloest three point of interest
    dist_lis = []
    # read train station data
    train = GNR[GNR['FEATURE'] == 'TRAIN STATION']
    train = around_3km(loc1, train)
    # if there is not any train station within 3 km straight line distance, it should return empty list
    if train.shape[0] == 0:
        dist_lis = []
    else:
        # extact all features
        train_stop = list(train["PLACE_NAME"].unique())
        # find cloest three points
        for i in range(train.shape[0]):
            # feature points
            loc2 = (train.iloc[i]["LATITUDE"],train.iloc[i]["LONGITUDE"])
            # calculate distance between property and feature
            dist = hs.haversine(loc1,loc2,unit=Unit.METERS)
            # check the cloest
            dist_lis = cloest_point(dist_lis,dist,loc2,train.iloc[i]["PLACE_NAME"])
    return dist_lis

# calculate the cloest three bus stop for each property data
def distance_bus(loc1):
    # read train station data
    GNR = pd.read_csv('../data/curated/GNR_cleaned.csv', low_memory = False)
    # initaliza the dict for record the cloest three point of interest
    dist_lis = []
    # read train station data
    bus = GNR[GNR['FEATURE'] == 'BUS']
    bus = around_1500m(loc1, bus)
    # if there is not any bus stop within 1.5 km straight line distance, it should return empty list
    if bus.shape[0] == 0:
        dist_lis = []
    else:
        # extact all features
        bus_stop = list(bus["PLACE_NAME"].unique())
        # find cloest three points
        for i in range(bus.shape[0]):
            # feature points
            loc2 = (bus.iloc[i]["LATITUDE"],bus.iloc[i]["LONGITUDE"])
            # calculate distance between property and feature
            dist = hs.haversine(loc1,loc2,unit=Unit.METERS)
            # check the cloest
            dist_lis = cloest_point(dist_lis,dist,loc2,bus.iloc[i]["PLACE_NAME"])
    return dist_lis

# calculate the cloest three tram stop for each property data
def distance_tram(loc1):
    # read train station data
    GNR = pd.read_csv('../data/curated/GNR_cleaned.csv', low_memory = False)
    # initaliza the dict for record the cloest three point of interest
    dist_lis = []
    # read train station data
    tram = GNR[GNR['FEATURE'] == 'TRAM STATION']
    tram = around_2km(loc1, tram)
    # if there is not any bus stop within 2 km straight line distance, it should return empty list
    if tram.shape[0] == 0:
        dist_lis = []
    else:
        # extact all features
        tram_stop = list(tram["PLACE_NAME"].unique())
        # find cloest three points
        for i in range(tram.shape[0]):
            # feature points
            loc2 = (tram.iloc[i]["LATITUDE"],tram.iloc[i]["LONGITUDE"])
            # calculate distance between property and feature
            dist = hs.haversine(loc1,loc2,unit=Unit.METERS)
            # check the cloest
            dist_lis = cloest_point(dist_lis,dist,loc2,tram.iloc[i]["PLACE_NAME"])
    return dist_lis

In [None]:
start=datetime.now()
# find the cloest train station
property_df['cloest_train_station'] = property_df["coordinates"].apply(distance_train)
print(datetime.now()-start)

In [None]:
start=datetime.now()
# find the cloest tram stop
property_df['cloest_tram_stop'] = property_df["coordinates"].apply(distance_tram)
print(datetime.now()-start)

In [None]:
start=datetime.now()
# find the cloest bus stop
property_df['cloest_bus_stop'] = property_df["coordinates"].apply(distance_bus)
print(datetime.now()-start)

In [None]:
property_df['cloest_train_station'][0]

In [None]:
property_df

In [None]:
property_df.to_csv('../data/curated/property_cleaned.csv',index=False)

In [None]:
# put two coordinate in and return the duration between two coordinates
# coordinate form [longitude, latitude]
def calculate_distance_between_coordinates(coordinate1, coordinate2, api_key):

    # connect open route service
    client = ors.Client(key = api_key)

    # put two coordinates in list
    cor = [(coordinate1[1],coordinate1[0]), (coordinate2[1],coordinate2[0])]

    # using open route service
    route = client.directions(
    coordinates= cor,
    profile='driving-car',
    format='geojson',
    )
    
    time.sleep(1.5)

    # dict of distance and duration
    dist = route['features'][0]['properties']['segments'][0]['distance']
    duration = route['features'][0]['properties']['segments'][0]['duration']
    
    # return the duration
    return dist, duration

In [None]:
def find_driving(coor,lis,api_key):
    result ={"dist":[],"name":[],"duration":[]}
    # first point
    dist, duration = calculate_distance_between_coordinates(coor,lis[0][1],api_key)
    result['dist'].append(dist)
    result['name'].append(lis[0][2])
    result['duration'].append(duration)
    
    # second point
    dist, duration = calculate_distance_between_coordinates(coor,lis[1][1],api_key)
    result['dist'].append(dist)
    result['name'].append(lis[1][2])
    result['duration'].append(duration)
    
    return result

In [None]:
#property_df.at[row, 'suburb'] = most_match
            
def transportation_time(df,api1,api2,api3):
    train = []
    tram = []
    bus =[]
    for row in df.index.to_list():
        train.append(find_driving(df.loc[row, 'coordinates'],df.loc[row, 'cloest_train_station'],api1))
        tram.append(find_driving(df.loc[row, 'coordinates'],df.loc[row, 'cloest_tram_stop'],api2))
        bus.append(find_driving(df.loc[row, 'coordinates'],df.loc[row, 'cloest_bus_stop'],api3))
    df['driving_to_train'] = train
    df['driving_to_tram'] = tram
    df['driving_to_bus'] = bus
    return df

In [None]:
property_df['cloest_tram_stop'][0]

In [None]:

gmaps = googlemaps.Client(key='AIzaSyAwGu0E8STeETxFXmrL0UjyeF7mFRAn_5k')
now = datetime.now()
for i in property_df.shape[0]:
    properity_coor = property_df['coordinates'][i]
    closest_train_station = property_df['cloest_train_station'][i][0][1]
    closest_tram_stop = property_df['cloest_tram_stop'][i][0][1]
    closest_bus_stop = property_df['cloest_bus_stop'][i][0][1]

    
directions_result = gmaps.directions()

In [None]:
# google map code below:

In [180]:
property_df = pd.read_csv('../data/curated/property_cleaned.csv', low_memory = False)
property_df['cloest_train_station'][0]

"[(208.7885192155693, (-37.811981, 144.955654), 'Flagstaff Railway Station (Melbourne City)'), (521.9037087436633, (-37.809939, 144.962594), 'Melbourne Central Railway Station (Melbourne City)')]"

In [181]:
property_df['cloest_train_station'][0][0]

'['

In [None]:
# find the travel time bewteen the property and its corresponding closest train station
final_direction_result = []
gmaps = googlemaps.Client(key='AIzaSyAwGu0E8STeETxFXmrL0UjyeF7mFRAn_5k')
now = datetime.now()
for i in range(property_df.shape[0]):
    properity_coor = property_df['coordinates'][i]
    if len(property_df['cloest_train_station'][i]) < 1:
        final_direction_result.append(-1)
        pass
    else:
        closest_train_station = property_df['cloest_train_station'][i][0][1]
        directions_result = gmaps.directions(properity_coor, closest_train_station, mode='driving', departure_time=now)
        final_direction_result.append(directions_result)
final_direction_result

In [None]:
properity_coor = property_df['coordinates'][0]
closest_train_station = property_df['cloest_train_station'][0][0][1]
gmaps = googlemaps.Client(key='AIzaSyAwGu0E8STeETxFXmrL0UjyeF7mFRAn_5k')
now = datetime.now()
directions_result = gmaps.directions(properity_coor, closest_train_station, mode='driving', departure_time=now)
directions_result

In [None]:
properity_coor = property_df['coordinates'][0]
properity_coor

In [None]:
df = pd.DataFrame()
# reverse the coordinates by small size due to request limitation
i = 0
key = 0
while i < 10000:
    if i < 9000:
        df_i = property_df.copy().iloc[i:i+2000]
        df_i = transportation_time(df_i,api_keys[key],api_keys[key+1],api_keys[key+2])
        df = pd.concat([df ,df_i],ignore_index=True)
    else:
        
        df_i = property_df.copy().iloc[i:]
        df_i = transportation_time(df_i,api_keys[key],api_keys[key+1],api_keys[key+2])
        df = pd.concat([df ,df_i],ignore_index=True)
    print(i)
    i+=2000
    key+=3

In [None]:
tesy = transportation_time(test,api_keys[0])
tesy

In [None]:
test['driving_to_train'][0]

In [None]:
test.index.to_list()

In [None]:
# read rental data
path = os.getcwd().replace("notebooks","") + "data/curated/"
train = pd.read_csv(path+'train_station.csv', low_memory = False)
property_df = pd.read_csv(path+'cleaned_rent.csv', low_memory = False)
train["coordinates"] = list(zip(train.LATITUDE, train.LONGITUDE))
#GNR = GNR.drop_duplicates(subset='coordinates', keep="first")

In [None]:
train.head(5)

In [None]:
# combine longtitude and latitude to coordnates and use first 10 property data as sample
property_df["coordinates"] = list(zip(property_df.latitude, property_df.longitude))
property_df = property_df.iloc[:10]
property_df.head(5)

In [None]:

# comparing the distances and record three cloest points
def cloest_point(dist_dict,dist,loc,stop):
    # if there is no point, then just append the point into dict
    if len(dist_dict) < 3:
        dist_dict.append((dist,loc,stop))
    # if there already has three points, then compares distance
    else:
        # if the current distance smaller than records' distance
        if dist < dist_dict[0][0]:
            # delete the record point
            dist_dict.pop(0)
            # append new cloest point
            dist_dict.append((dist,loc,stop))
        elif dist < dist_dict[1][0]:
            # delete the record point
            dist_dict.pop(1)
            # append new cloest point
            dist_dict.append((dist,loc,stop))
        elif dist < dist_dict[2][0]:
            # delete the record point
            dist_dict.pop(2)
            # append new cloest point
            dist_dict.append((dist,loc,stop))
    return dist_dict
            
# calculate the cloest three point of interest for each property data
def distance(loc1):
    # read train station data
    train = pd.read_csv(path+'train_station.csv', low_memory = False)
    # extact all features
    train_stop = list(train["STOP_NAME"].unique())
    # initaliza the dict for record the cloest three point of interest
    dist_lis = []
    # find cloest three points
    for i in range(train.shape[0]):
        # feature points
        loc2 = (train.iloc[i]["LATITUDE"],train.iloc[i]["LONGITUDE"])
        # calculate distance between property and feature
        dist = hs.haversine(loc1,loc2,unit=Unit.METERS)
        # check the cloest
        dist_lis = cloest_point(dist_lis,dist,loc2,train.iloc[i]["STOP_NAME"])
    return dist_lis

In [None]:
property_df["train_station"]= property_df["coordinates"].apply(distance)  # calculate distance for each property row

In [None]:
property_df.head(5)

In [None]:
property_df["train_station"][0]

In [None]:
# put two coordinate in and return the duration between two coordinates
# coordinate form [longitude, latitude]
def calculate_distance_between_coordinates(coordinate1, coordinate2):

    # put your own open route service api key in here
    api_key_openrouteservice = '5b3ce3597851110001cf6248d864908ae526479e86e6f4dd70971a37'

    # connect open route service
    client = ors.Client(key = api_key_openrouteservice)

    # put two coordinates in list
    cor = [coordinate1, coordinate2]

    # using open route service
    route = client.directions(
    coordinates= cor,
    profile='driving-car',
    format='geojson',
    )

    # dict of distance and duration
    dist = route['features'][0]['properties']['segments'][0]['distance']
    duration = route['features'][0]['properties']['segments'][0]['duration']
    
    # return the duration
    return dist, duration

In [None]:
nearest_train_list = []
nearest_distance_list = []
nearest_duration_list = []
for i in range(len(property_df)):
    # print(i)
    long = property_df["longitude"][i]
    lat = property_df["latitude"][i]
    property_coordinate = [long, lat]

    # print(property_coordinate)

    train = property_df["train_station"][i]

    duration = []
    distance = []

    for j in range(len(train)):
        train_coord = train[j][1]
        train_long = train_coord[1]
        train_lat = train_coord[0]

        train_position = [train_long, train_lat]
        # print(train_position)
        dist_in_between, duration_in_detween = calculate_distance_between_coordinates(property_coordinate, train_position)

        duration.append(duration_in_detween)
        distance.append(dist_in_between)
    
    for k in range(1, len(duration)):
        if duration[k-1] <= duration[k]:
            nearest_point_index = k-1
            nearest_duration = duration[k-1]
            nearest_distance = distance[k-1]

        else:
            nearest_point_index = k
            nearest_duration = duration[k]
            nearest_distance = distance[k]

    nearest_train = train[nearest_point_index]
    print(nearest_train)
    nearest_train_list.append(nearest_train[2])
    nearest_distance_list.append(nearest_distance)
    nearest_duration_list.append(nearest_duration)

In [None]:
property_df["nearest_train"] = nearest_train_list
property_df["nearest_distance(m)"] = nearest_distance_list
property_df["nearest_duration(s)"] = nearest_duration_list
property_df = property_df[['address','coordinates', 'train_station', 'nearest_train','nearest_distance(m)', 'nearest_duration(s)']]

property_df.to_csv("../data/curated/dist_property_train.csv")

In [None]:
property_df

#### for further coding

In [None]:
def find_closest(loc1, train_points):
    for point in train_points:
        dist, duration = calculate_distance_between_coordinates(loc1,point[1])
    return dist, duration

In [None]:
# generate a dict for each property data that contains distance and coordination of cloest three features
def generate_dict(features):
    dist_lis = {}
    # initialize feature
    for feature in features:
        dist_lis[feature] = []
    return dist_lis

# comparing the distances and record three cloest points
def cloest_point(point,dist_dict,dist,loc):
    # if there is no point, then just append the point into dict
    if len(dist_dict[point["FEATURE"]]) < 3:
        dist_dict[point["FEATURE"]].append((dist,loc,point["PLACE_NAME"]))
    # if there already has three points, then compares distance
    else:
        # if the current distance smaller than records' distance
        if dist < dist_dict[point["FEATURE"]][0][0]:
            # delete the record point
            dist_dict[point["FEATURE"]].pop(0)
            # append new cloest point
            dist_dict[point["FEATURE"]].append((dist,loc,point["PLACE_NAME"]))
        elif dist < dist_dict[point["FEATURE"]][1][0]:
            # delete the record point
            dist_dict[point["FEATURE"]].pop(1)
            # append new cloest point
            dist_dict[point["FEATURE"]].append((dist,loc,point["PLACE_NAME"]))
        elif dist < dist_dict[point["FEATURE"]][2][0]:
            # delete the record point
            dist_dict[point["FEATURE"]].pop(2)
            # append new cloest point
            dist_dict[point["FEATURE"]].append((dist,loc,point["PLACE_NAME"]))
    return dist_dict
            
# calculate the cloest three point of interest for each property data
def distance(loc1):
    # read point of interest data
    GNR = pd.read_csv(path+'GNR_suburb.csv', low_memory = False)
    # extact all features
    Point_of_Interest = list(GNR["FEATURE"].unique())
    # initaliza the dict for record the cloest three point of interest
    dist_lis = generate_dict(Point_of_Interest)
    # find cloest three points
    for feature in Point_of_Interest:
        df = GNR[GNR["FEATURE"] == feature]  # select feature data
        # calculate  distances for all feature points
        for i in range(df.shape[0]):
            # feature points
            loc2 = (df.iloc[i]["LATITUDE"],df.iloc[i]["LONGITUDE"])
            # calculate distance between property and feature
            dist = hs.haversine(loc1,loc2,unit=Unit.METERS)
            # check the cloest
            dist_lis = cloest_point(df.iloc[i],dist_lis,dist,loc2)
    return dist_lis