In [80]:
import openrouteservice as ors
import folium
from pandas import DataFrame
import pandas as pd
import math
import time
import re
import shapefile
import os
key = os.environ.get('key')
client = ors.Client(key= key)
#client = ors.Client(key='5b3ce3597851110001cf62480f2e0047228f425292712f3d6a0044d3')

#Please note that due to the daily limit of the website and the limit of the number of reads per minute, it is recommended to read only ten rental data at a time
rent = pd.read_csv("../rent_data/rent_cleaned.csv")
rent = rent[1200:].reset_index(drop=True)
park = shapefile.Reader("../data/raw/ll_gda2020/esrishape/whole_of_dataset/victoria/CROWNLAND/PARKRES.dbf")
shapes = park.shapes()
records = park.records()


In [2]:
def get_centerpoint(lis):
    """
    program: calculate center point of park
    paras: longtitude, latutude
    """
    area = 0.0
    latitude,longtitude = 0.0,0.0
 
    a = len(lis)
    for i in range(a):
        lat = float(lis[i][1]) #weidu
        lng = float(lis[i][0]) #jingdu
 
        if i == 0:
            lat1 = float(lis[-1][1])
            lng1 = float(lis[-1][0])
 
        else:
            lat1 = float(lis[i-1][1])
            lng1 = float(lis[i-1][0])
 
        fg = (lat*lng1 - lng*lat1)/2.0
 
        area += fg
        latitude += fg*(lat+lat1)/3.0
        longtitude += fg*(lng+lng1)/3.0
 
    latitude = latitude/area
    longtitude = longtitude/area
 
    return longtitude, latitude

In [3]:
def get_each_park_geo(shapes):
    """
    program: get each park longitude and latitude
    para: coordinates list
    """
    coordinates = []
    for i in range(len(shapes)):
        coordinates.append(get_centerpoint(shapes[i].points))
    return coordinates
park_c = get_each_park_geo(shapes)

In [4]:
def cal_distance(coordinates, client):
    """
    function: calculate coordinates' distance
    param datas : durations, distances
    """
    matrix = client.distance_matrix(
    locations=coordinates,
    profile='driving-car',
    metrics=['distance', 'duration'],
    validate=False,
    )
    time.sleep(2.5)
    return matrix['durations'], matrix['distances']
# the unit of distance is m, the unit of durations is seconds

In [78]:
def get_summary(rent, park_c):
    """
    function: Get a list of each rent and the trains closest to the rent
    paras: summary(list)
    """
    r_coor = []
    summary = []
    for i in range(len(rent)):
        r_coor.append([rent["longitude"][i], rent["latitude"][i]])
    for k in range(len(r_coor)):
        coor = []
        coor.append(r_coor[k])
        for m in range(len(park_c)):
            if ((park_c[m][0]-0.05) <= r_coor[k][0] <= (park_c[m][0] + 0.05)) & ((park_c[m][1]-0.05) <= r_coor[k][1] <= (park_c[m][1] + 0.05)):
                coor.append(park_c[m])
                # Due to the limitation of openroute, a calculation can only contain up to 59 data, so it needs to be further reduced
                if len(coor) >= 59:
                    new_coor =[coor[0]]
                    for n in range(1,len(coor)):
                        if ((coor[n][0]-0.03) <= coor[0][0] <= (coor[n][0] + 0.03)) & ((coor[n][1]-0.03) <= coor[0][1] <= (coor[n][1] + 0.03)):
                            new_coor.append(coor[n])
                    coor = new_coor
        summary.append(coor)
    return summary

In [6]:
park_name = []
for i in range(len(records)):
    park_name.append(records[i][1])

In [7]:
# get a dataframe to convenient later calculate
park_df = pd.DataFrame(columns = ['park_name', 'coordinates'])
park_df['coordinates'] = park_c
park_df['park_name'] = park_name

In [8]:
def get_coordinates(rent, park_c):
    """
    function: Find the nearest park to rent 
    param datas : rent_address, park_name, min_distance, min_duration
    """
    rent_address = []
    park_name = []
    min_distance = []
    min_duration = []
    summary = get_summary(rent, park_c)
    new_sum = []
    for l in range(len(summary)):
        if len(summary[l]) != 1:
            new_sum.append(summary[l])
        else:
            rent_address.append(rent['listingSlug'][l])
            park_name.append("NULL")
            min_distance.append("NULL")
            min_duration.append("NULL")
    for i in range(len(new_sum)):
        distances = []
        durations = []
        matrix = cal_distance(new_sum[i], client)
        distances = distances + matrix[1][0][1:]
        durations = durations + matrix[0][0][1:]
        index = distances.index(min(distances))
        rent_address.append(rent['listingSlug'][summary.index(new_sum[i])])
        park_index = park_df[park_df['coordinates'] == new_sum[i][index+1]].index.tolist()[0]
        park_name.append(park_df['park_name'][park_index])
        min_distance.append(distances[index])
        min_duration.append(durations[index])
    return rent_address, park_name, min_distance, min_duration

In [81]:
# -- coding: utf-8 --
def data_frame(rent, park):
    """
    function: put the required columns into a dataframe
    param datas: dataframe
    """
    #get dataframe
    data = {'rent_address': get_coordinates(rent, park_c)[0], 'stop_name': get_coordinates(rent, park_c)[1], 'min_distance': get_coordinates(rent, park_c)[2], 'min_duration': get_coordinates(rent, park_c)[3]}
    df = DataFrame(data)
    return df

df = data_frame(rent, park)
df.to_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(12).csv", index=False)

In [82]:
# merge data
old = pd.read_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(0).csv", index_col=0)
new = pd.read_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(1).csv", index_col=0)

In [83]:
renew = pd.concat([old, new])
new2 = pd.read_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(2).csv", index_col=0)
renew2 = pd.concat([renew, new2])
new3 = pd.read_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(3).csv", index_col=0)
renew3 = pd.concat([renew2, new3])
new4 = pd.read_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(4).csv", index_col=0)
renew4 = pd.concat([renew3, new4])
new5 = pd.read_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(5).csv", index_col=0)
renew5 = pd.concat([renew4, new5])
new6 = pd.read_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(6).csv", index_col=0)
renew6 = pd.concat([renew5, new6])
new7 = pd.read_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(7).csv", index_col=0)
renew7 = pd.concat([renew6, new7])
new8 = pd.read_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(8).csv", index_col=0)
renew8 = pd.concat([renew7, new8])
new9 = pd.read_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(9).csv", index_col=0)
renew9 = pd.concat([renew8, new9])
new10 = pd.read_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(10).csv", index_col=0)
renew10 = pd.concat([renew9, new10])
new11 = pd.read_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(11).csv", index_col=0)
renew11 = pd.concat([renew10, new11])
new12 = pd.read_csv("/home/ads/generic-real-estate-consulting-project-group-55/data/raw/park distance and duration/park_distances_durations(12).csv", index_col=0)
renew12 = pd.concat([renew11, new12]).reset_index()

In [87]:
renew12.to_csv("../data/curated/park_distances_durations(final).csv")