# The distance and duration between train station and rental house

In [1]:
import openrouteservice as ors
import folium
from pandas import DataFrame
import pandas as pd
import math
import time
import os

In [3]:
key = os.environ.get('key')
client = ors.Client(key= key)

#Please note that due to the daily limit of the website and the limit of the number of reads per minute, it is recommended to read only ten rental data at a time
rent = pd.read_csv("../rent_data/rent_cleaned.csv")
rent = rent[1200:].reset_index(drop=True)
train = pd.read_excel("../data/raw/train_station_20to21.xlsx")

In [250]:
def cal_distance(coordinates, client):
    """
    function: calculate coordinates' distance
    param datas : durations, distances
    """
    matrix = client.distance_matrix(
    locations=coordinates,
    profile='driving-car',
    metrics=['distance', 'duration'],
    validate=False,
    )
    # api cannot read more than forty times per minute
    time.sleep(2.5)
    return matrix['durations'], matrix['distances']
# the unit of distance is m, the unit of durations is seconds

In [251]:
def get_summary(rent, train):
    """
    function: Get a list of each rent and the trains closest to the rent
    paras: summary(list)
    """
    coordinates = []
    r_coor = []
    summary = []
    # train coordinates
    for j in range(len(train)):
        coordinates.append([train["Stop_long"][j], train["Stop_lat"][j]])
    # rental house coordinates
    for i in range(len(rent)):
        r_coor.append([rent["longitude"][i], rent["latitude"][i]])
    # the set that one rental house's coordinates and the coordinates of the train that differs from the latitude and longitude of the rental house's coordinates by no more than 0.3
    for k in range(len(r_coor)):
        coor = []
        coor.append(r_coor[k])
        for m in range(len(coordinates)):
            if ((coordinates[m][0]-0.3) <= r_coor[k][0] <= (coordinates[m][0] + 0.3)) & ((coordinates[m][1]-0.3) <= r_coor[k][1] <= (coordinates[m][1] + 0.3)):
                coor.append(coordinates[m])
        summary.append(coor)
    return summary

In [252]:
def get_coordinates(rent, train):
    """
    function: Find the nearest site to rent 
    param datas : rent_address, stop_name, min_distance, min_duration
    """
    rent_address = []
    stop_name = []
    min_distance = []
    min_duration = []
    summary = get_summary(rent, train)
    new_sum = []
    # Pick out rental houses that do not have a corresponding train
    for l in range(len(summary)):
        if len(summary[l]) != 1:
            new_sum.append(summary[l])
        else:
            rent_address.append(rent['listingSlug'][l])
            stop_name.append("NULL")
            min_distance.append("NULL")
            min_duration.append("NULL")
    for i in range(len(new_sum)):
        distances = []
        durations = []
        # calculate distances and durations
        matrix = cal_distance(new_sum[i], client)
        distances = distances + matrix[1][0][1:]
        durations = durations + matrix[0][0][1:]
        index = distances.index(min(distances))
        rent_address.append(rent['listingSlug'][summary.index(new_sum[i])])
        stop_index = train[train['Stop_long'] == new_sum[i][index+1][0]].index.tolist()[0]
        stop_name.append(train['Stop_name'][stop_index])
        min_distance.append(distances[index])
        min_duration.append(durations[index])
    return rent_address, stop_name, min_distance, min_duration

In [253]:
def data_frame(rent, train):
    """
    function: put the required columns into a dataframe
    param datas: dataframe
    """
    #get dataframe
    data = {'rent_address': get_coordinates(rent, train)[0], 'stop_name': get_coordinates(rent, train)[1], 'min_distance': get_coordinates(rent, train)[2], 'min_duration': get_coordinates(rent, train)[3]}
    df = DataFrame(data)
    return df

df = data_frame(rent, train)
df.to_csv("../data/raw/train distance and duration/distances_durations(12).csv", index=False)

#### we ran the code twice. We upload the data directly into the ../distances_and_durations, because it was hard to copy.
#### This is the presentation of data

In [4]:
# merge data
old = pd.read_csv("../distances_and_durations/train_new/distances_durations(0).csv", index_col=0)
new = pd.read_csv("../distances_and_durations/train_new/distances_durations(1).csv", index_col=0)

In [5]:
renew = pd.concat([old, new])
new2 = pd.read_csv("../distances_and_durations/train_new/distances_durations(2).csv", index_col=0)
renew2 = pd.concat([renew, new2])
new3 = pd.read_csv("../distances_and_durations/train_new/distances_durations(3).csv", index_col=0)
renew3 = pd.concat([renew2, new3])
new4 = pd.read_csv("../distances_and_durations/train_new/distances_durations(4).csv", index_col=0)
renew4 = pd.concat([renew3, new4])
new5 = pd.read_csv("../distances_and_durations/train_new/distances_durations(5).csv", index_col=0)
renew5 = pd.concat([renew4, new5])
new6 = pd.read_csv("../distances_and_durations/train_new/distances_durations(6).csv", index_col=0)
renew6 = pd.concat([renew5, new6])
new7 = pd.read_csv("../distances_and_durations/train_new/distances_durations(7).csv", index_col=0)
renew7 = pd.concat([renew6, new7])
new8 = pd.read_csv("../distances_and_durations/train_new/distances_durations(8).csv", index_col=0)
renew8 = pd.concat([renew7, new8]).reset_index()

In [7]:
# add rantal house's postcode and price
rent = pd.read_csv("../rent_data/rent_extra.csv", index_col=0)
new_data = renew8
postcode = []
for i in range(len(new_data)):
    postcode.append(new_data['rent_address'][i][-13:-9])
new_data['postcode'] = postcode
price =[]
for rent_a in new_data['rent_address']:
    price.append(rent[rent['listingSlug'] == rent_a]['price'].tolist()[0])
new_data['price'] = price
new_data = new_data.sort_values(by=["postcode"],ascending=True).reset_index(drop=True)
new_data

Unnamed: 0,rent_address,stop_name,min_distance,min_duration,postcode,price
0,20-davison-street-richmond-vic-3121-6782699,Southern Cross Railway Station (Melbourne City),6406.38,768.71,-312,550.0
1,3008-288-spencer-street-melbourne-vic-3000-160...,Southern Cross Railway Station (Melbourne City),1923.12,236.46,3000,410.0
2,7-562-little-bourke-street-melbourne-vic-3000-...,Southern Cross Railway Station (Melbourne City),1692.09,195.70,3000,380.0
3,816-101-therry-st-melbourne-vic-3000-16068338,Southern Cross Railway Station (Melbourne City),2623.76,326.55,3000,820.0
4,601-8-exploration-lane-melbourne-vic-3000-1608...,Southern Cross Railway Station (Melbourne City),3353.56,394.56,3000,530.0
...,...,...,...,...,...,...
914,9-oakley-street-mount-dandenong-vic-3767-16111799,Nar Nar Goon Railway Station (Nar Nar Goon),47388.73,3325.33,3767,500.0
915,30-beluga-street-mount-eliza-vic-3930-16104711,,,,3930,1900.0
916,71-balcombe-drive-mount-martha-vic-3934-16113110,,,,3934,950.0
917,4-cirrus-close-hampton-park-vic-3976-16112856,Nar Nar Goon Railway Station (Nar Nar Goon),30036.18,2073.70,3976,420.0


In [337]:
new_data.to_csv("../distances_and_durations/train_new/train_distance_duration.csv")