# Calculating Proximity to Melb CBD

- need to change api_key to your api

In [7]:
import time
from openrouteservice import Client
import math
import pandas as pd

In [8]:
domain_df = pd.read_parquet('../data/raw/all_domain_properties.parquet')

# Display the DataFrame
domain_df.head()

Unnamed: 0,parking,property_type,date_available,weekly_cost,address,suburb,postcode,beds,baths,latitude,longitude
0,1,Unit,2024,895.0,,,,3,2,-37.9180621,145.0437866
1,1,Unit,2024,500.0,1/37 brougham street,box hill,3128.0,2,1,-37.8215748,145.1156929
2,1,Unit,2024,400.0,2/137 market road,werribee,3030.0,3,2,-37.8898187,144.6537343
3,2,House,2024,548.0,61 pinnacle drive,pakenham,3810.0,4,2,-38.0839375,145.451479
4,2,Unit,2024,1300.0,2703/368 st kilda road,melbourne,3004.0,2,2,-37.8318264,144.9710642


In [11]:
# Check lat and longs are valid
print(domain_df['latitude'].apply(type).value_counts())

# Check lat and longs are valid
print(domain_df['latitude'].apply(type).value_counts())

latitude
<class 'str'>    13072
Name: count, dtype: int64
latitude
<class 'str'>    13072
Name: count, dtype: int64


In [12]:
# Initialize OpenRouteService client
api_key ='5b3ce3597851110001cf62483425e548d4ed4c2ca4a6b5814be96094' # PLUG IN YOUR API KEY HERE
client = Client(key=api_key)

# Define Melbourne CBD coordinates
melb_cbd = [144.962646, -37.810272]

def calculate_distances_to_cbd(properties, batch_size):
    distances_to_cbd = []
    num_batches = math.ceil(len(properties) / batch_size)

    for i in range(num_batches):
        # Get current batch of properties
        batch = properties[i * batch_size: (i + 1) * batch_size]
        # Create matrix request with properties as sources and Melb CBD as the destination
        locations = batch + [melb_cbd]  # Batch properties + CBD point

        while True:
            try:
                # Perform matrix request
                response = client.distance_matrix(
                    locations=locations,
                    profile='driving-car',
                    metrics=['distance'],
                    sources=list(range(len(batch))),  # Sources: property indices
                    destinations=[len(batch)]  # Destination: index of Melb CBD
                )

                # Extract distances from response (km)
                distances = [response['distances'][j][0] / 1000 for j in range(len(batch))]
                distances_to_cbd.extend(distances)
                break  # Exit the loop if successful

            except Exception as e:
                print(f"Error with batch {i+1}: {e}")
                if "Rate limit exceeded" in str(e):
                    print("Rate limit exceeded. Waiting for 60 seconds...")
                    time.sleep(60)  # Wait for a minute before retrying
                else:
                    distances_to_cbd.extend([None] * len(batch))  # Append None for failed requests
                    break  # Exit the loop on other errors

        time.sleep(2)  # additional delay between batches

    return distances_to_cbd

## Running Function on Rental Properties

- change domain_df to your df

In [13]:
# Convert the coordinates column to a list of [longitude, latitude] pairs
property_coordinates = domain_df[['longitude', 'latitude']].values.tolist()

# Calculate distances
distances = calculate_distances_to_cbd(property_coordinates, batch_size=50)

# Add the distances as a new column 'dist_cbd' to the original DataFrame
domain_df['dist_cbd'] = distances

# Display the updated DataFrame with the new column
domain_df.head(50)

Unnamed: 0,parking,property_type,date_available,weekly_cost,address,suburb,postcode,beds,baths,latitude,longitude,dist_cbd
0,1,Unit,2024,895.0,,,,3,2,-37.9180621,145.0437866,18.64398
1,1,Unit,2024,500.0,1/37 brougham street,box hill,3128.0,2,1,-37.8215748,145.1156929,19.62262
2,1,Unit,2024,400.0,2/137 market road,werribee,3030.0,3,2,-37.8898187,144.6537343,33.79973
3,2,House,2024,548.0,61 pinnacle drive,pakenham,3810.0,4,2,-38.0839375,145.451479,58.34248
4,2,Unit,2024,1300.0,2703/368 st kilda road,melbourne,3004.0,2,2,-37.8318264,144.9710642,3.16452


In [14]:
# Display the updated DataFrame with the new column
domain_df.head(50)

Unnamed: 0,parking,property_type,date_available,weekly_cost,address,suburb,postcode,beds,baths,latitude,longitude,dist_cbd
0,1,Unit,2024,895.0,,,,3,2,-37.9180621,145.0437866,18.64398
1,1,Unit,2024,500.0,1/37 brougham street,box hill,3128.0,2,1,-37.8215748,145.1156929,19.62262
2,1,Unit,2024,400.0,2/137 market road,werribee,3030.0,3,2,-37.8898187,144.6537343,33.79973
3,2,House,2024,548.0,61 pinnacle drive,pakenham,3810.0,4,2,-38.0839375,145.451479,58.34248
4,2,Unit,2024,1300.0,2703/368 st kilda road,melbourne,3004.0,2,2,-37.8318264,144.9710642,3.16452
5,2,House,2024,670.0,4 chevalier crescent,mooroolbark,3138.0,3,2,-37.7677611,145.3230364,38.69936
6,4,House,2024,500.0,45 stanley street,toora,3962.0,1,1,-38.6647862,146.3241998,183.01992
7,1,Townhouse,2024,855.0,54 nelson street,st kilda,3182.0,3,2,-37.8563704,144.9864482,6.79802
8,1,Unit,2024,530.0,307/12 waterview walk,docklands,3008.0,1,1,-37.8194723,144.9478449,3.17894
9,0,Unit,2024,460.0,1804/618 lonsdale street,melbourne,3000.0,1,1,-37.81441450000001,144.9539107,1.8307
