### This file is dedicated to calculate distance of individual property to cbd using online routing services via API calls
Created by Yuecheng Wang, 12-09-2024

In [1]:
import pandas as pd 
from tqdm import tqdm

In [2]:
# import scripts for api calculation
import sys
import os

# Add the scripts folder to the Python path
scripts_path = os.path.abspath('../../scripts')

if scripts_path not in sys.path:
    sys.path.append(scripts_path)

from api_function import calculate_distance_car

In [3]:
# Melbourne CBD coordinate
# this is literally found from type Melbourne CBD coord on Google: 37.8136° S, 144.9631° E
melb_coord = [-37.8136, 144.9631]

In [4]:
# using https://project-osrm.org/ for calculate route
api_base_url = "https://project-osrm.org"

# dataset 
file_path = "../../data/raw/domain/all_properties_preprocessed.csv"
domain_data = pd.read_csv(file_path)

In [5]:
domain_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6283 entries, 0 to 6282
Data columns (total 15 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Unnamed: 0                    6283 non-null   int64  
 1   Address                       6283 non-null   object 
 2   Cost                          6283 non-null   float64
 3   Property Type                 6283 non-null   object 
 4   Bedrooms                      6283 non-null   int64  
 5   Bathrooms                     6283 non-null   int64  
 6   Latitude                      6283 non-null   float64
 7   Longitude                     6283 non-null   float64
 8   Closest Gov Secondary School  5468 non-null   object 
 9   Gov Secondary Distance        5468 non-null   object 
 10  Age under 20                  6210 non-null   object 
 11  Age 20-39                     6210 non-null   object 
 12  Age 40-59                     6210 non-null   object 
 13  Age

as all coordinates are present, we use all data \
first, try with first 5

In [6]:
sample_properties = domain_data.head(5).copy()

In [7]:
sample_properties['CBD distance(km)'] = sample_properties.apply(calculate_distance_car, des_coords=melb_coord, axis=1)

In [8]:
sample_properties

Unnamed: 0.1,Unnamed: 0,Address,Cost,Property Type,Bedrooms,Bathrooms,Latitude,Longitude,Closest Gov Secondary School,Gov Secondary Distance,Age under 20,Age 20-39,Age 40-59,Age 60+,Postcode,CBD distance(km)
0,0,"8/90 Hambleton Street, Middle Park VIC 3206",410.0,Apartment,1,1,-37.847553,144.960477,Albert Park College,1.2 km away,36%,15%,30%,19%,3206,4.9721
1,1,"3/33 Bevan Street, Albert Park VIC 3206",550.0,Apartment,1,1,-37.839959,144.956373,Albert Park College,0.9 km away,28%,18%,28%,26%,3206,4.0862
2,3,"7/7-9 Faussett Street, Albert Park VIC 3206",490.0,Apartment,1,1,-37.84167,144.955332,Albert Park College,0.8 km away,20%,26%,29%,25%,3206,4.4456
3,4,Albert Park VIC 3206,1280.0,Apartment,1,1,-37.843861,144.951454,Albert Park College,0.4 km away,17%,20%,36%,27%,3206,4.7979
4,5,"214/363 Beaconsfield Parade, Middle Park VIC 3206",350.0,Apartment,1,1,-37.854035,144.961308,Albert Park College,1.6 km away,19%,24%,27%,29%,3206,5.8848


This works fine, now, do it with full dataset to include CBD there.

In [9]:
tqdm.pandas()

CBD_distance = domain_data.progress_apply(calculate_distance_car, des_coords=melb_coord, axis=1)

# Store distance as independent file for merge later
CBD_distance_df = CBD_distance.to_frame(name='CBD distance(km)')

  0%|▏                                                                              | 11/6283 [00:14<2:14:51,  1.29s/it]


KeyboardInterrupt: 

Once finish, store in a new dataset.

In [None]:
CBD_distance_df.to_csv('../../data/raw/domain/cbd_distance.csv', index=False)