In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np

# Directories
data_dir = '../data/'
landing_dir = data_dir + 'landing/'
raw_dir = data_dir + 'raw/'
curated_dir = data_dir + 'curated/'

pd.set_option('display.max_columns', None)

foi_sf = gpd.read_file(f"{landing_dir}FOI/GEOMARK_POLYGON.shp")
rental_df = pd.read_csv(f"{raw_dir}rental_with_coordinates.csv")


In [2]:
# Filtering for only shopping precincts and shopping centres only
shopping_labels = ["shopping precinct", "shopping centre"]

foi_sf['geometry'] = foi_sf['geometry'].to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
shopping_sf = foi_sf[foi_sf['STATE'] == "VIC"]
shopping_sf = shopping_sf[shopping_sf['FTYPE'] == "commercial facility"]
shopping_sf = shopping_sf[shopping_sf['FEATSUBTYP'].isin(shopping_labels)]
shopping_sf = shopping_sf.reset_index(drop=True)

shopping_sf

Unnamed: 0,UFI,PFI,FEATURE_ID,PARENTFTID,FTYPE,FEATSUBTYP,FEATSTATUS,NAME,NAME_LABEL,PARENTNAME,CHILDEXIST,AUTHORGC,AUTHORGID,AUTHORGVER,VMADD_PFI,VICNAMESID,VICNMSTATC,THEME1,THEME2,STATE,CRDATE_PFI,SUPER_PFI,CRDATE_UFI,FEATURE_UF,FEATURE_CR,geometry
0,66115996,10449,10449,,commercial facility,shopping precinct,,,,,,,,0000/00/00,,-10379,11,,,VIC,2009-05-20,,2023-01-25,66115996,2023-01-25,"POLYGON ((145.15946 -37.78846, 145.15953 -37.7..."
1,69951988,1001570,1001570,,commercial facility,shopping precinct,,,,,,,,0000/00/00,,-1393296,11,,,VIC,2014-06-06,,2024-01-12,69951988,2024-01-12,"POLYGON ((144.99742 -37.82785, 144.9971 -37.82..."
2,66913459,15146,15146,,commercial facility,shopping precinct,,,,,,,,0000/00/00,51658679,-15076,11,,,VIC,2009-05-20,,2023-05-30,66913459,2023-05-30,"POLYGON ((145.16294 -37.99111, 145.16303 -37.9..."
3,68220163,1001597,1001597,,commercial facility,shopping precinct,,,,,,,,0000/00/00,50102927,-1393362,11,,,VIC,2014-06-06,,2023-07-04,68220163,2023-07-04,"POLYGON ((145.00757 -37.82634, 145.00719 -37.8..."
4,65742584,997184,997184,,commercial facility,shopping precinct,,,,,,,,0000/00/00,50246800,-1383305,11,,,VIC,2013-12-19,,2022-11-30,65742584,2022-11-30,"POLYGON ((145.00031 -37.92516, 145.00036 -37.9..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1798,70203279,12689,12689,,commercial facility,shopping precinct,,,,,Y,,,0000/00/00,206411135,-1396275,11,,,VIC,2009-05-20,,2024-02-02,70203279,2024-02-02,"POLYGON ((144.95769 -37.83244, 144.95756 -37.8..."
1799,70203320,999090,999090,,commercial facility,shopping precinct,,,,,Y,,,0000/00/00,50111075,-1388030,11,,,VIC,2014-04-08,,2024-02-02,70203320,2024-02-02,"POLYGON ((145.03327 -37.80706, 145.03317 -37.8..."
1800,70480889,999041,999041,,commercial facility,shopping precinct,,,,,,,,0000/00/00,,-1388096,11,,,VIC,2014-04-08,,2024-04-11,70480889,2024-04-11,"POLYGON ((145.04315 -37.82412, 145.04306 -37.8..."
1801,71229080,996191,996191,,commercial facility,shopping precinct,,,,,,,,0000/00/00,,-1381254,11,,,VIC,2013-10-29,,2024-08-22,71229080,2024-08-22,"POLYGON ((145.1682 -38.14301, 145.1679 -38.142..."


In [3]:
# Referenced: https://stackoverflow.com/questions/69854674/python-generate-lat-long-points-from-address
rental_df = rental_df.dropna(subset=['latitude', 'longitude'])
rental_df = rental_df.reset_index(drop=True)

In [4]:
from sklearn.metrics.pairwise import haversine_distances

shopping_sf['centroid'] = shopping_sf.centroid
# shopping_sf['centroid'] = shopping_sf['centroid'].to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
shopping_centroid_list = shopping_sf['centroid'].apply(lambda geom: (geom.y, geom.x))
shopping_centroid_list = list(shopping_centroid_list)
shopping_centroid_array = np.array(shopping_centroid_list)

rental_centroid_array = rental_df[['latitude', 'longitude']].to_numpy()

shopping_centroid_radians = np.radians(shopping_centroid_array)
rental_centroid_radians = np.radians(rental_centroid_array)

distances_radians = haversine_distances(shopping_centroid_radians, rental_centroid_radians)
distances_km = distances_radians * 6371
nearest_point_id = np.argmin(distances_km, axis=0)
nearest_distance = np.min(distances_km, axis=0)

rental_df['nearest_shopping_id'] = shopping_sf.index[nearest_point_id]
rental_df['distance_to_nearest_shopping'] = nearest_distance



  shopping_sf['centroid'] = shopping_sf.centroid


In [5]:
rental_df.to_csv(f"{curated_dir}rental_with_shopping_features.csv")