# Use API (OpenRouteService) to Calculate Driving Distance for Shopping Centre

In [2]:
import requests
import time
import pandas as pd
import re
import os
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
def get_driving_distance(api_key, lat1, lon1, lat2, lon2):

    """
    Calculate the driving distance between two sets of latitude and longitude coordinates using the OpenRouteService API.

    Parameters:
    - api_key: Your OpenRouteService API key for authentication.
    - lat1, lon1: Latitude and longitude of the starting location.
    - lat2, lon2: Latitude and longitude of the destination location.

    Returns:
    - float: The driving distance in kilometers between the two locations.
    """
    
    # Endpoint for driving distance calculation
    url = "https://api.openrouteservice.org/v2/directions/driving-car"
    
    # Create a query string with start and end coordinates
    coords = "{},{},{}{}".format(lon1, lat1, lon2, lat2)  # Notice that ORS uses (longitude, latitude)
    
    # Define the headers with your API key
    headers = {
        "Authorization": api_key,
        "Content-Type": "application/json, application/geo+json, application/gpx+xml, img/png; charset=utf-8"
    }
    
    # Make the request
    response = requests.get(url + '?start=' + str(lon1) + ',' + str(lat1) + '&end=' + str(lon2) + ',' + str(lat2), headers=headers)
    
    # Parse the JSON response
    data = response.json()

    # Extract and return the driving distance (in meters)
    distance_meters = data['features'][0]['properties']['segments'][0]['distance']
    return distance_meters / 1000  # Convert to kilometers

In [4]:
shopping = pd.read_csv('../data/landing/mall_coordinates.csv')
shopping.head()

Unnamed: 0,Mall Name,Longitude,Latitude
0,206 Bourke Street,144.966947,-37.812733
1,670 Chapel,144.996158,-37.837395
2,Acland Court Shopping Centre,144.980617,-37.868967
3,Altona Gate Shopping Centre,144.84627,-37.828989
4,Arena Shopping Centre,145.435175,-38.064493


In [12]:
API_KEY = "5b3ce3597851110001cf62480fa38e6c3509477285853e127dfed58e" 
# Alternative API KEY
# 5b3ce3597851110001cf6248e6d646bdeed84ac49405c492359d2f8c

In [5]:
def distance(lat1, lon1, lat2, lon2):

    """
    Calculate the approximate distance between two sets of latitude and longitude coordinates using a simplified method.

    Parameters:
    - lat1, lon1: Latitude and longitude of the first location.
    - lat2, lon2: Latitude and longitude of the second location.

    Returns:
    - float: The approximate distance between the two locations in kilometers.
    """
    
    # Calculate the difference in latitude (in kilometers)
    lat_diff = (lat2 - lat1) * 111

    # Calculate the difference in longitude (in kilometers) based on average latitude
    avg_lat = np.radians((lat1 + lat2) / 2.0)  # Convert average latitude to radians
    lon_diff = (lon2 - lon1) * 111 * np.cos(avg_lat)

    # Calculate the distance using the Pythagorean theorem on the lat_diff and lon_diff
    return np.sqrt(lat_diff**2 + lon_diff**2)


In [6]:
def find_nearest_shop(house_lat, house_lon):

    """
    Find the nearest shops to a given house's latitude and longitude coordinates.

    Parameters:
    - house_lat, house_lon (float): Latitude and longitude of the house's location.

    Returns:
    - float: The minimum driving distance (in kilometers) to the nearest shop.
    """

    # List to store the distances from the house to each shop
    distances = []

    # Iterate through each row (shop) in the shopping dataframe
    for index, row in shopping.iterrows():
        shop_lat, shop_lon = row['Latitude'], row['Longitude']

        # Calculate the straight-line distance between the house and shop
        dist = distance(house_lat, house_lon, shop_lat, shop_lon)

        # Append the shop's coordinates and its distance to the distances list
        distances.append((shop_lat, shop_lon, dist))

    # Sort the shops based on distance and get the top 3 nearest shops
    nearest_shops = sorted(distances, key=lambda x: x[2])[:3]

    # Initialize the minimum driving distance to a very large value
    min_driving_distance = float('inf')

    # For each of the nearest shops, calculate the driving distance 
    # and update the minimum driving distance if necessary
    for shop_lat, shop_lon, _ in nearest_shops:
        driving_distance = get_driving_distance(API_KEY, house_lat, house_lon, shop_lat, shop_lon)
        min_driving_distance = min(min_driving_distance, driving_distance)
    
    # Print the minimum driving distance
    print(min_driving_distance)

    # Sleep for a second to avoid hitting API rate limits or overloading the system
    time.sleep(1)

    return min_driving_distance


In [7]:
df = pd.read_csv('../data/raw/domain_outliers_removed.csv')

In [8]:
# create folder for closest driving distance to shopping_malls
relative_path = '../data/raw/shopping_distance/'
if not os.path.exists(relative_path):
    os.makedirs(relative_path)

In [15]:
#2178-2179 cannot work(cannot drive)
#5091-5092 cannot work(coordinate is (0,0))
#5380-5381 cannot work(coordinate is (0,0))
#6074-6075 cannot work(coordinate is (0,0))
#8787-8788 cannot work(cannot drive)

input_path = '../data/raw/domain_outliers_removed.csv'
output_dir = '../data/raw/shopping_distance/'

# Due to the limitation of API key, 20 of the distances are calculated each loop, which also
# enables to identify the coordinate with problem above
batch_size = 20
total_rows = 8894 

for batch_start in range(0, total_rows, batch_size):
    df = pd.read_csv(input_path, skiprows=range(1, batch_start + 1), nrows=batch_size)

    df['NearestShoppingDistance'] = df.apply(
        lambda row: find_nearest_shop(row['Latitude'], row['Longitude']), 
        axis=1
    )
    
    batch_number = (batch_start // 20 ) + 1
    output_path = f"{output_dir}{batch_number}_{batch_start + 1}-{batch_start + batch_size}.csv"
    
    df.to_csv(output_path, index=False)
    #time.sleep(120)


0.7061000000000001
0.48660000000000003
0.48660000000000003
0.3063
0.6447999999999999
0.6886
0.5962999999999999
0.24730000000000002
0.9101
0.2746
0.1827
0.24730000000000002
0.6065
0.8486
0.3063
0.576
1.1300999999999999
1.9558
1.5155
1.9801
