In [None]:
import pandas as pd
import geopandas as gpd
import openrouteservice as ors

from shapely.geometry import Point
from pyproj import Transformer


# Initialize the ORS client with your API key
client = ors.Client(key='V0iCluxQhWKCPBn-H6zmtcmHZYU=')

# Read data of property
property_data = pd.read_csv('../../data/raw/co_location.csv')

# Read data of stations (tram, train, bus)
tram_stops = gpd.read_file('../../data/raw/external/Transportation/PTV_METRO_TRAM_STOP.shp')
train_stops = gpd.read_file('../../data/raw/external/Transportation/PTV_REGIONAL_TRAIN_STATION.shp')
bus_stops = gpd.read_file('../../data/raw/external/Transportation/PTV_REGIONAL_BUS_ROUTE.shp')

# Prepare property coordinates for ORS
property_data['coordinates'] = property_data.apply(lambda row: [row['longitude'], row['latitude']], axis=1)

# Function to get a representative point from a geometry (Point or LineString)
def get_point_from_geometry(geom): 
    if geom.geom_type == 'Point':
        return [geom.x, geom.y]
    elif geom.geom_type == 'LineString':
        midpoint = geom.interpolate(0.5, normalized=True) # Get the midpoint of the LineString
        return [midpoint.x, midpoint.y]   
    else:
        return None # Handle other geometry types if necessary

# Convert the geometries of tram, train, and bus stops to coordinates
tram_coords = [[geom.x, geom.y] for geom in tram_stops.geometry if geom.geom_type == 'Point']
train_coords = [[geom.x, geom.y] for geom in train_stops.geometry if geom.geom_type == 'Point']
bus_coords = [get_point_from_geometry(geom) for geom in bus_stops.geometry if get_point_from_geometry(geom) is not None]

# Read data of school locations and encoding to 'ISO-8859-1'
school_data = pd.read_csv('../../data/raw/external/school/schoolscount2023.csv', encoding='ISO-8859-1')

# Initialize the transformer to convert WGS84 (longitude and latitude) to EPSG:3111 (Vicgrid)
transformer = Transformer.from_crs("EPSG:4326", "EPSG:3111", always_xy=True)

# Convert school longitude and latitude to EPSG:3111 and create the geometry column
school_data['geometry'] = school_data.apply(
    lambda row: Point(transformer.transform(row['X'], row['Y'])), axis=1
)

# Convert the school data to a GeoDataFrame
school_gdf = gpd.GeoDataFrame(school_data, geometry='geometry', crs='EPSG:3111')

# Create a list of school coordinates
school_coords = [[geom.x, geom.y] for geom in school_gdf.geometry if geom.is_valid and not geom.is_empty]

# Function to get nearest locations using ORS distance matrix
def get_nearest_location_batch(property_coords, stop_coords, stop_names):
    try:
        response = client.distance_matrix(
            locations=property_coords + stop_coords,
            profile='foot-walking',
            metrics=['distance'],
            sources=list(range(len(property_coords))),
            destinations=list(range(len(property_coords), len(property_coords) + len(stop_coords)))
        )
        
        nearest_locations = []
        for i, distances in enumerate(response['distances']):
            min_distance = min(distances)
            nearest_idx = distances.index(min_distance)
            nearest_locations.append((stop_names[nearest_idx], min_distance / 1000))  # Distance in km
        
        return nearest_locations
    except Exception as e:
        print(f"Error during ORS request: {e}")
        return [(None, None)] * len(property_coords)

# Batch process the properties to find the nearest tram stops
batch_size = 50  # Adjust this size based on performance and API limits
property_coords_list = property_data['coordinates'].tolist()
tram_stop_names = tram_stops['STOP_NAME'].tolist()

results = []
for i in range(0, len(property_coords_list), batch_size):
    batch_coords = property_coords_list[i:i + batch_size]
    nearest_tram_locations = get_nearest_location_batch(batch_coords, tram_coords, tram_stop_names)
    results.extend(nearest_tram_locations)

# Assign the results to the DataFrame
property_data['nearest_tram_stop'], property_data['nearest_tram_stop_distance_km'] = zip(*results)

# Save the updated property data to a CSV file
property_data.head(10)

In [None]:
import pandas as pd
import googlemaps
from shapely.geometry import Point
from pyproj import Transformer
import geopandas as gpd

# 使用你的Google Cloud API Key
gmaps = googlemaps.Client(key='AIzaSyDz72C6p-UUiUMfGRgUjcDvQHjGFIiGliE')

# Read data of property
property_data = pd.read_csv('../../data/raw/co_location.csv')

# Read data of stations (tram, train, bus)
tram_stops = gpd.read_file('../../data/raw/external/Transportation/PTV_METRO_TRAM_STOP.shp')
train_stops = gpd.read_file('../../data/raw/external/Transportation/PTV_REGIONAL_TRAIN_STATION.shp')
bus_stops = gpd.read_file('../../data/raw/external/Transportation/PTV_REGIONAL_BUS_ROUTE.shp')

# Prepare property coordinates for ORS
property_data['coordinates'] = property_data.apply(lambda row: [row['longitude'], row['latitude']], axis=1)

# Function to get a representative point from a geometry (Point or LineString)
def get_point_from_geometry(geom): 
    if geom.geom_type == 'Point':
        return [geom.x, geom.y]
    elif geom.geom_type == 'MultiLineString':
        midpoints = [line.interpolate(0.5, normalized=True) for line in geom]
        centroid = Point(sum([point.x for point in midpoints]) / len(midpoints),
                     sum([point.y for point in midpoints]) / len(midpoints))
        return [centroid.x, centroid.y]  
    else:
        return None # Handle other geometry types if necessary

# Convert the geometries of tram, train, and bus stops to coordinates
tram_coords = [[geom.x, geom.y] for geom in tram_stops.geometry if geom.geom_type == 'Point']
train_coords = [[geom.x, geom.y] for geom in train_stops.geometry if geom.geom_type == 'Point']
bus_coords = [get_point_from_geometry(geom) for geom in bus_stops.geometry if get_point_from_geometry(geom) is not None]

# Read data of school locations and encoding to 'ISO-8859-1'
school_data = pd.read_csv('../../data/raw/external/school/schoolscount2023.csv', encoding='ISO-8859-1')

# Initialize the transformer to convert WGS84 (longitude and latitude) to EPSG:3111 (Vicgrid)
transformer = Transformer.from_crs("EPSG:4326", "EPSG:3111", always_xy=True)

# Convert school longitude and latitude to EPSG:3111 and create the geometry column
school_data['geometry'] = school_data.apply(
    lambda row: Point(transformer.transform(row['X'], row['Y'])), axis=1
)

# Convert the school data to a GeoDataFrame
school_gdf = gpd.GeoDataFrame(school_data, geometry='geometry', crs='EPSG:3111')

# Create a list of school coordinates
school_coords = [[geom.x, geom.y] for geom in school_gdf.geometry if geom.is_valid and not geom.is_empty]

# Google API的导航距离函数
def get_nearest_location_navigation(property_coords, stop_coords, stop_names):
    nearest_locations = []
    for prop in property_coords:
        min_distance = float('inf')
        nearest_stop = None
        for i, stop in enumerate(stop_coords):
            try:
                # 使用 Google Directions API 获取导航距离
                directions = gmaps.directions(
                    origin=(prop[1], prop[0]),   # 注意latitude和longitude的顺序
                    destination=(stop[1], stop[0]),
                    mode='walking',
                    units='metric'
                )
                
                if directions:
                    distance = directions[0]['legs'][0]['distance']['value'] / 1000  # 获取距离（公里）
                    if distance < min_distance:
                        min_distance = distance
                        nearest_stop = stop_names[i]
            except Exception as e:
                print(f"Error during Google Directions API request: {e}")
        nearest_locations.append((nearest_stop, min_distance))
    return nearest_locations

# Batch process the properties to find the nearest tram stops
batch_size = 50  # Adjust this size based on performance and API limits
property_coords_list = property_data['coordinates'].tolist()
tram_stop_names = tram_stops['STOP_NAME'].tolist()

results = []
for i in range(0, len(property_coords_list), batch_size):
    batch_coords = property_coords_list[i:i + batch_size]
    nearest_tram_locations = get_nearest_location_navigation(batch_coords, tram_coords, tram_stop_names)
    results.extend(nearest_tram_locations)

# Assign the results to the DataFrame
property_data['nearest_tram_stop'], property_data['nearest_tram_stop_distance_km'] = zip(*results)

# Save the updated property data to a CSV file
property_data.head(10)
