# Calculate Straight-Line Distance for park

In [1]:
import requests
import pandas as pd
import re
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import time

In [2]:
park = pd.read_json('../data/landing/parks.json')
park.head()

Unnamed: 0,title,latitude,longitude
0,Yarra Ranges National Park,-37.682829,145.978727
1,Wyperfeld National Park,-35.585065,142.052018
2,Wilsons Promontory National Park,-39.027074,146.333818
3,Werribee Gorge State Park,-37.663112,144.363175
4,Warrandyte State Park,-37.735107,145.204488


In [3]:
def distance(lat1, lon1, lat2, lon2):

    """
    Calculate the approximate distance between two sets of latitude and longitude coordinates using a simplified method.

    Parameters:
    - lat1, lon1: Latitude and longitude of the first location.
    - lat2, lon2: Latitude and longitude of the second location.

    Returns:
    - float: The approximate distance between the two locations in kilometers.
    """
    
    lat_diff = (lat2 - lat1) * 111

    # 1 degree of longitude varies, we use an approximation based on the average latitude
    avg_lat = np.radians((lat1 + lat2) / 2.0)  # Convert average latitude to radians
    lon_diff = (lon2 - lon1) * 111 * np.cos(avg_lat)

    # Calculate the distance using the Pythagorean theorem on the lat_diff and lon_diff
    return np.sqrt(lat_diff**2 + lon_diff**2)

In [4]:
def find_nearest_park(house_lat, house_lon, park):

    """
    Find the distance from a house to the nearest park in a DataFrame.

    Parameters:
    - house_lat: Latitude of the house.
    - house_lon: Longitude of the house.
    - park: DataFrame containing park locations with 'latitude' and 'longitude' columns.

    Returns:
    - float: The distance to the nearest park.
    """
    
    # Calculate the distance from the property to each park station coordinate
    distances = park.apply(
        lambda row: distance(house_lat, house_lon, row['latitude'], row['longitude']), 
        axis=1
    )

    # Return the closest distance
    return distances.min()

In [5]:
df = pd.read_csv('../data/raw/domain_outliers_removed.csv')

In [6]:
# Create another column which represents the distance to the nearest park 
df['NearestParkDistance'] = df.apply(
    lambda row: find_nearest_park(row['Latitude'], row['Longitude'], park), 
    axis=1
)

# Output the csv file
df.to_csv("../data/raw/merge_requirement/Direct_park.csv")