In [16]:
import pandas as pd
from math import radians, sin, cos, sqrt, atan2

property_df = pd.read_csv('../data/curated/None_longi_lati_remove_domain.csv.csv')
stations_data = pd.read_csv('../data/curated/combined_stops_with_postcodes.csv')
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat / 2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c  


def filter_stations_by_distance(property_lat, property_lng, stations_data, max_distance_km=3):
    filtered_stations = stations_data[
        stations_data.apply(lambda row: haversine(property_lat, property_lng, row['stop_lat'], row['stop_lon']) <= max_distance_km, axis=1)
    ]
    return filtered_stations

property_df['candidate_stations'] = None

for index, row in property_df.iterrows():
    property_lat, property_lng = row['addressLat'], row['addressLng']
    nearby_stations = filter_stations_by_distance(property_lat, property_lng, stations_data, max_distance_km=5)

    property_df.at[index, 'candidate_stations'] = nearby_stations.to_dict('records')
property_df.to_pickle('property_with_filtered_stations.pkl')

print("already")


already


In [32]:
import googlemaps
import pandas as pd
import time

gmaps = googlemaps.Client(key='') #here is mine private api

property_df = pd.read_pickle('../data/curated/property_with_filtered_stations.pkl')

def get_walking_distance(property_lat, property_lng, station_lat, station_lng):
    origins = (property_lat, property_lng)
    destinations = (station_lat, station_lng)
    
    result = gmaps.distance_matrix(origins, destinations, mode='walking')
    
    if result['rows'][0]['elements'][0]['status'] == 'OK':
        distance_value = result['rows'][0]['elements'][0]['distance']['value']  
        return distance_value
    else:
        return float('inf') 

property_df['closest_station'] = None
property_df['distance_to_closest_station'] = None

for index, row in property_df.iterrows():
    property_lat, property_lng = row['addressLat'], row['addressLng']
    candidate_stations = row['candidate_stations']  
    
    if len(candidate_stations) == 1:
        station = candidate_stations[0]
        station_lat, station_lng = station['stop_lat'], station['stop_lon']
        distance_value = get_walking_distance(property_lat, property_lng, station_lat, station_lng)
        property_df.at[index, 'closest_station'] = station['stop_name']
        property_df.at[index, 'distance_to_closest_station'] = distance_value / 1000  
    else:
        min_distance = float('inf')
        closest_station = None
        
        for station in candidate_stations:
            station_lat, station_lng = station['stop_lat'], station['stop_lon']
            distance_value = get_walking_distance(property_lat, property_lng, station_lat, station_lng)
            
            if distance_value < min_distance:
                min_distance = distance_value
                closest_station = station['stop_name']
    
        property_df.at[index, 'closest_station'] = closest_station
        property_df.at[index, 'distance_to_closest_station'] = min_distance / 1000  # 转换为公里

    time.sleep(1)

property_df.to_csv('property_with_walking_distance.csv', index=False)

print("already")


already


In [38]:
property_df.to_csv('property_with_walking_distance1.csv', index=False, encoding='utf-8')
