In [3]:
import pandas as pd
from shapely.geometry import Point
from pyproj import Transformer
import geopandas as gpd

In [5]:
# Read data of property
property_data = pd.read_csv('../../data/raw/co_location.csv')

# Read data of stations (tram, train, bus)
tram_stops = gpd.read_file('../../data/raw/external/Transportation/PTV_METRO_TRAM_STOP.shp')
train_stops = gpd.read_file('../../data/raw/external/Transportation/PTV_REGIONAL_TRAIN_STATION.shp')
bus_stops = gpd.read_file('../../data/raw/external/Transportation/PTV_REGIONAL_BUS_ROUTE.shp')

# Read data of school and encoding to 'ISO-8859-1'
school_data = pd.read_csv('../../data/raw/external/school/schoollocations2023.csv', encoding='ISO-8859-1')

In [6]:
# Initialize the transformer to convert WGS84 (longitude and latitude) to EPSG:3111 (Vicgrid)
transformer = Transformer.from_crs("EPSG:4326", "EPSG:3111", always_xy=True)

# Convert property longitude and latitude to EPSG:3111
property_data['geometry'] = property_data.apply(
    lambda row: Point(transformer.transform(row['longitude'], row['latitude'])), axis=1
)

# Convert property data to a GeoDataFrame
property_gdf = gpd.GeoDataFrame(property_data, geometry='geometry', crs='EPSG:3111')

In [7]:
# Ensure valid geometry for tram, train, and bus stops
tram_stops['geometry'] = tram_stops['geometry'].apply(lambda geom: geom if geom.is_valid else geom.buffer(0))
train_stops['geometry'] = train_stops['geometry'].apply(lambda geom: geom if geom.is_valid else geom.buffer(0))
bus_stops['geometry'] = bus_stops['geometry'].apply(lambda geom: geom if geom.is_valid else geom.buffer(0))

In [8]:
# Convert school longitude and latitude to EPSG:3111 and create a GeoDataFrame
school_data['geometry'] = school_data.apply(
    lambda row: Point(transformer.transform(row['X'], row['Y'])), axis=1
)
school_gdf = gpd.GeoDataFrame(school_data, geometry='geometry', crs='EPSG:3111')

In [9]:
# Calculate the nearest tram stop and its distance for each property
def get_nearest_tram_stop(geom):
    if geom is None or geom.is_empty:
        return None, None
    tram_distances = tram_stops.distance(geom)
    nearest_tram_idx = tram_distances.idxmin()
    nearest_tram_stop_name = tram_stops.loc[nearest_tram_idx, 'STOP_NAME']
    nearest_tram_distance_km = tram_distances.min() / 1000 
    return nearest_tram_stop_name, nearest_tram_distance_km

# Apply the nearest tram stop and distance calculation
property_gdf['nearest_tram_stop'], property_gdf['nearest_tram_stop_distance_km'] = zip(
    *property_gdf.geometry.apply(lambda geom: get_nearest_tram_stop(geom) if geom.is_valid else (None, None))
)

In [10]:
# Calculate the nearest train stop and its distance for each property
def get_nearest_train_stop(geom):
    if geom is None or geom.is_empty:
        return None, None
    train_distances = train_stops.distance(geom)
    nearest_train_idx = train_distances.idxmin()
    nearest_train_stop_name = tram_stops.loc[nearest_train_idx, 'STOP_NAME']
    nearest_train_distance_km = train_distances.min() / 1000  
    return nearest_train_stop_name, nearest_train_distance_km

# Apply the nearest train stop and distance calculation
property_gdf['nearest_train_stop'], property_gdf['nearest_train_stop_distance_km'] = zip(
    *property_gdf.geometry.apply(lambda geom: get_nearest_train_stop(geom) if geom.is_valid else (None, None))
)

In [12]:
# Calculate the nearest bus stop and its distance for each property
def get_nearest_bus_stop(geom):
    if geom is None or geom.is_empty:
        return None, None
    bus_distances = bus_stops.distance(geom)
    nearest_bus_idx = bus_distances.idxmin()
    nearest_bus_stop_name = bus_stops.loc[nearest_bus_idx, 'ROUTELONGN']
    nearest_bus_distance_km = bus_distances.min() / 1000 
    return nearest_bus_stop_name, nearest_bus_distance_km

# Apply the nearest bus stop and distance calculation
property_gdf['nearest_bus_stop'], property_gdf['nearest_bus_stop_distance_km'] = zip(
    *property_gdf.geometry.apply(lambda geom: get_nearest_bus_stop(geom) if geom.is_valid else (None, None))
)

In [13]:
# Calculate the nearest school and its distance for each property
def get_nearest_school(geom):
    if geom is None or geom.is_empty:
        return None, None
    distances = school_gdf.distance(geom)
    nearest_idx = distances.idxmin()
    nearest_school_name = school_gdf.loc[nearest_idx, 'School_Name']  
    nearest_school_distance_km = distances.min() / 1000  
    return nearest_school_name, nearest_school_distance_km

# Apply the nearest school and distance calculation
property_gdf['nearest_school'], property_gdf['nearest_school_distance_km'] = zip(
    *property_gdf.geometry.apply(lambda geom: get_nearest_school(geom) if geom.is_valid else (None, None))
)

In [14]:
# Check
property_gdf.head(3)

Unnamed: 0,name,Bed,weekly_rent,latitude,longitude,geometry,nearest_tram_stop,nearest_tram_stop_distance_km,nearest_train_stop,nearest_train_stop_distance_km,nearest_bus_stop,nearest_bus_stop_distance_km,nearest_school,nearest_school_distance_km
0,"1208/50 Albert Street, South Melbourne VIC 3205",1,520.0,-37.834344,144.955904,POINT (2496118.379 2407409.798),127-South Melbourne Station/Light Rail (South ...,0.136816,51-Narrak Rd/Whitehorse Rd (Mont Albert),1.801668,Caroline Springs Station - Deer Park Station,17.245706,Galilee Regional Catholic Primary School,0.223329
1,"64 Mills Street, Albert Park VIC 3206",3,1495.0,-37.846426,144.958009,POINT (2496304.265 2406068.919),135-Richardson St/Mills St (Middle Park),0.090635,51-Narrak Rd/Whitehorse Rd (Mont Albert),3.155055,Caroline Springs Station - Deer Park Station,17.94835,Middle Park Primary School,0.158652
2,"11 Barnato St, Weir Views VIC 3338",4,460.0,-37.718775,144.554187,POINT (2460697.114 2420145.173),49-Central Park Ave/Cordite Ave (Maribyrnong),29.072242,44-Deepdene Park/Whitehorse Rd (Balwyn),2.335418,Bacchus Marsh - Hillview Estate Via Bacchus Ma...,10.881234,Al Iman College,1.632357


In [15]:
#Save to csv
property_gdf.to_csv('../../data/landing/nearest_distance.csv', index=False)