# Calculate distance to CBD and minimum distance and duration to train station

This notebook is used to calculate the distance to Melbourne CBD with latitude and longitude coordinates are: [-37.840935, 144.946457](https://www.latlong.net/place/melbourne-vic-australia-27235.html) for each rental property.

In [92]:
import pandas as pd
import openrouteservice as ors
import numpy as np
import math
import time

In [93]:
# Limitation of 40 calls per minutes, 500 calls per day
# coord(lng,lat)
def cal_CBD_distance_duration(coordinates, client):
    ''' 
    This function calculates the driving distance and duration from rental locations to the Melbourne CBD.
    It recieves A list of coordinates in the form [(lng, lat), ..., (lng, lat)],  where the first coordinate in the list 
    represents the Melbourne CBD location.
    Returns a list of distances(m) and durations(sec) for each route from the rental locations to the Melbourne CBD.
    '''
    matrix = client.distance_matrix(
        locations=coordinates,
        destinations = [0],
        profile='driving-car',
        metrics=['distance', 'duration'], 
        validate=False,
    )
    return matrix['distances'], matrix['durations']

In [94]:
rent_df = pd.read_csv("../data/curated/rent_cleaned.csv")

MEL_CBD_COOR = [(144.962646, -37.810272)]
locations = MEL_CBD_COOR + list(zip(rent_df['longitude'],rent_df['latitude'])) 
print(len(locations))

2963


In [95]:
client = ors.Client(key='5b3ce3597851110001cf6248c37b113ef7624ea4afac15bc7da77cc6')
distances, duractions = cal_CBD_distance_duration(locations, client) # Run for 5s

In [96]:
# Flatten the list of lists into a single list
flattened_distances = [item[0] for item in distances]
flattened_durations = [item[0] for item in duractions]

# Assign the flattened list to a new column in rent_df
dist_cbd = flattened_distances[1:] # Exclude first element, distance from CBD to CBD 
dur_cbd = flattened_durations[1:] # Exclude first element, duration from CBD to CBD 

results_df = pd.DataFrame({
    'min_dist': dist_cbd,
    'min_dur': dur_cbd
}, index=rent_df.index)  # Adjust index to match rental properties

results_df.to_csv("../data/curated/dist_dur_to_cbd.csv")
results_df.head()


Unnamed: 0,min_dist,min_dur
0,77980.16,4052.63
1,18463.67,1724.63
2,38993.27,2457.46
3,50375.36,2703.94
4,15235.71,1499.86


## Train Station

In [56]:
def cal_distance_to_train(coordinates_list, client):
    matrix = client.distance_matrix(
            locations=coordinates_list,
            destinations= list(range(10)),
            profile='driving-car',
            metrics=['distance', 'duration'],
            validate=False,
        )
    return  matrix['distances'], matrix['durations']


 10 records of rental properties runs for 2.5s, use 1 quota

In [87]:
# client = ors.Client(key='5b3ce3597851110001cf6248c37b113ef7624ea4afac15bc7da77cc6') 
client = ors.Client(key='5b3ce3597851110001cf6248c37fdbcdaf554d519ed7b85241426f38')

train = pd.read_csv("../data/raw/external/train_station/metropolitan_train_location_2023.csv")
train_stations = list(zip(train['Stop_long'], train['Stop_lat']))

min_distances = []
min_durations = []

api_calls = 0
max_calls_per_batch = 40

for i in range(0,len(rent_df),10):

    df = rent_df[i:i+10]
    all_coordinates = [(rental['longitude'], rental['latitude']) for _, rental in df.iterrows()] + train_stations
    
    distances, durations = cal_distance_to_train(all_coordinates,client)

    dist_df = pd.DataFrame(distances)
    dur_df = pd.DataFrame(durations)

    dist_df_filtered = dist_df.iloc[10:, :]  
    dur_df_filtered = dur_df.iloc[10:, :] 

    min_distances.extend(dist_df_filtered.apply(lambda row: min(row), axis=0))
    min_durations.extend(dur_df_filtered.apply(lambda row: min(row), axis=0))

    api_calls += 1

    if api_calls % max_calls_per_batch == 0:
        print(f"Pausing for 60 seconds to comply with API call limits.")
        time.sleep(60)

Pausing for 60 seconds to comply with API call limits.
Pausing for 60 seconds to comply with API call limits.
Pausing for 60 seconds to comply with API call limits.
Pausing for 60 seconds to comply with API call limits.
Pausing for 60 seconds to comply with API call limits.
Pausing for 60 seconds to comply with API call limits.
Pausing for 60 seconds to comply with API call limits.


In [88]:
# Last call of function only has 2 rental properties, and rest 8 are about train distance, hence exclude them
rent_min_distances = min_distances[:-8]
rent_min_durations = min_durations[:-8]
print(len(rent_min_distances))

2962


In [91]:
# Store them into a ddataframe
results_df = pd.DataFrame({
    'min_dist': rent_min_distances,
    'min_dur': rent_min_durations
}, index=rent_df.index)  # Adjust index to match rental properties
results_df.to_csv("../data/curated/min_dist_dur_nearest_train.csv")
results_df.head()

Unnamed: 0,min_dist,min_dur
0,18581.67,1267.66
1,1974.77,216.9
2,4148.63,529.58
3,3284.04,480.19
4,2387.93,260.07
