In [1]:
import pandas as pd
import geojson
from geojson import Feature, Point, FeatureCollection

# read files of houses and cbd
houses_df = pd.read_csv('..data/curated/property/property 2.csv') 
cbd_df = pd.read_csv('../data/raw/external/melbourne_cbd_coordinates.csv')
cbd_df = cbd_df.rename(columns={'Longitude': 'longitude', 'Latitude': 'latitude'})

houses_df = houses_df.dropna(subset=['longitude', 'latitude'])
cbd_df = cbd_df.dropna(subset=['longitude', 'latitude'])


# convert geojson
def create_geojson_feature(row, feature_type='house'):
    point = Point((row['longitude'], row['latitude']))
    properties = {
        'address': row.get('address', 'unknown'),
        'type': feature_type
    }
    return Feature(geometry=point, properties=properties)

# convert data of houses into GeoJSON Feature
house_features = houses_df.apply(lambda row: create_geojson_feature(row, 'house'), axis=1).tolist()
house_feature_collection = FeatureCollection(house_features)

# convert data of cbd GeoJSON Feature
cbd_features = cbd_df.apply(lambda row: create_geojson_feature(row, 'cbd'), axis=1).tolist()
cbd_feature_collection = FeatureCollection(cbd_features)

# Save the house and station as GeoJSON files respectively
houses_geojson_output_path = '../data/raw/external/API/2/property_geojson.geojson'
cbd_geojson_output_path = '../data/raw/external/API/2/vic_cbd_geojson.geojson'

with open(houses_geojson_output_path, 'w') as f:
    geojson.dump(house_feature_collection, f)

with open(cbd_geojson_output_path, 'w') as f:
    geojson.dump(cbd_feature_collection, f)


print("Created")

FileNotFoundError: [Errno 2] No such file or directory: '../data/curated/property/property_addresses_to_lat_lng_2_final.csv'

In [4]:
import geojson
import openrouteservice
import pandas as pd
import time

# OpenRouteService API
api_key = '5b3ce3597851110001cf6248e145f61cd48746779f79c90e911dfd2b'  # OpenRouteService API Key
client = openrouteservice.Client(key=api_key)

# read GeoJSON file
with open('../data/raw/external/API/2/property_geojson.geojson', 'r') as f:
    rent_geojson = geojson.load(f)

with open('../data/raw/external/API/2/vic_cbd_geojson.geojson', 'r') as f:
    cbd_geojson = geojson.load(f)

# Extract latitude and longitude
def extract_coordinates(features):
    coordinates = []
    for feature in features['features']:
        lon, lat = feature['geometry']['coordinates']
        address = feature['properties'].get('address', 'unknown')
        coordinates.append((lat, lon, address))
    return coordinates

rent_coordinates = extract_coordinates(rent_geojson)
cbd_coordinates = extract_coordinates(cbd_geojson)

# Extract the latitude and longitude of the house and cbd
house_coords = [(lon, lat) for lat, lon, _ in rent_coordinates]
cbd_coords = [(lon, lat) for lat, lon, _ in cbd_coordinates]

# use OpenRouteService API calculate distances in batches and process them in batches
def calculate_distance_matrix_in_batches(house_coords, cbd_coords, batch_size=100):
    all_shortest_distances = []
    
    for i in range(0, len(house_coords), batch_size):
        batch_houses = house_coords[i:i + batch_size]
        try:
            matrix = client.distance_matrix(
                locations=batch_houses + cbd_coords,
                profile='driving-car',
                sources=list(range(len(batch_houses))),
                destinations=list(range(len(batch_houses), len(batch_houses) + len(cbd_coords))),
                metrics=['distance'],
                units='km'
            )
            distances = matrix['distances']
            
            # Process the distance of each house and station to find the shortest distance
            for j, house_distances in enumerate(distances[:len(batch_houses)]):
                # Exclude the distance from the None value and find the minimum distance
                valid_distances = [d for d in house_distances if d is not None]
                if valid_distances:
                    min_distance = min(valid_distances)
                    closest_cbd_idx = house_distances.index(min_distance)
                    rent_address = rent_coordinates[i + j][2]
                    closest_cbd = cbd_coordinates[closest_cbd_idx][2]
                    all_shortest_distances.append({
                        'rent_address': rent_address,
                        'closest_cbd': closest_cbd,
                        'minimum_distance_cbd': min_distance
                    })
        except Exception as e:
            print(f"Error calculating distance matrix: {e}")
        time.sleep(1)  # Prevents triggering rate limits
    
    return all_shortest_distances

# Batch processing
shortest_distances = calculate_distance_matrix_in_batches(house_coords, cbd_coords, batch_size=100)

# Save the shortest distance results as a CSV file
shortest_distances_df = pd.DataFrame(shortest_distances)
output_path = '../data/curated/external/API/2/closest_cbd_distance_final_2.csv'
shortest_distances_df.to_csv(output_path, index=False)

print(f"Results have been saved to {output_path}")

结果已保存到 /Users/fenglingyi/Downloads/groupwork_final/closest_cbd_distance_final_2.csv
