In [None]:
import pandas as pd
import geojson

In [None]:
from geojson import Feature, Point, FeatureCollection

houses_df = pd.read_csv('../data/curated/property/property_4_lat_lon.csv')  
school = pd.read_csv('../data/raw/external/school locations.csv',encoding='latin1')  
school_df = school.rename(columns={'X': 'longitude', 'Y': 'latitude'})

houses_df = houses_df.dropna(subset=['longitude', 'latitude'])
school_df = school_df.dropna(subset=['longitude', 'latitude'])


### create GeoJSON Feature 
def create_geojson_feature(row, feature_type='house'):
    point = Point((row['longitude'], row['latitude']))
    properties = {
        'address': row.get('address', 'unknown'), 
        'type': feature_type
    }
    return Feature(geometry=point, properties=properties)

### convert the data into Geojson
house_features = houses_df.apply(lambda row: create_geojson_feature(row, 'house'), axis=1).tolist()
house_feature_collection = FeatureCollection(house_features)

school_features = school_df.apply(lambda row: create_geojson_feature(row, 'school'), axis=1).tolist()
school_feature_collection = FeatureCollection(school_features)


### save the geo file
houses_geojson_output_path = '../data/raw/external/API/4/property_4_geojson.geojson'
school_geojson_output_path = '../data/raw/external/API/4/school locations_geojson.geojson'

with open(houses_geojson_output_path, 'w') as f:
    geojson.dump(house_feature_collection, f)

with open(school_geojson_output_path, 'w') as f:
    geojson.dump(school_feature_collection, f)


print("Geojson created")


In [None]:
import geopandas as gpd

### divided the property file into small ones in order to calculate the API
### the following are the same code but using the differnet parts
gdf_school = gpd.read_file('../data/raw/external/API/4/property_4_geojson.geojson')
chunk_size = 500
num_chunks = len(gdf_school) // chunk_size + (1 if len(gdf_school) % chunk_size != 0 else 0)

# divided into length 500
gdfs = [gdf_school.iloc[i*chunk_size : (i+1)*chunk_size] for i in range(num_chunks)]

### save them
for i, part in enumerate(gdfs):
    part.to_file(f'../data/raw/external/API/4/property_4_geojson_part_{i+1}.geojson', driver='GeoJSON')

print(f"divided into {num_chunks}.")


In [None]:
import geojson
import openrouteservice
import pandas as pd
import time

### part 5
api_key = '5b3ce3597851110001cf6248ef157f3775f94378ac76799c0f696a53'  
client = openrouteservice.Client(key=api_key)

### read file
with open('../data/raw/external/API/4/property_4_geojson_part_5.geojson', 'r') as f:
    rent_geojson = geojson.load(f)

with open('../data/raw/external/API/4/school locations_geojson.geojson', 'r') as f:
    school_geojson = geojson.load(f)

### convert coordinates
def extract_coordinates(features):
    coordinates = []
    for feature in features['features']:
        lon, lat = feature['geometry']['coordinates']
        address = feature['properties'].get('address', 'unknown')
        coordinates.append((lat, lon, address))
    return coordinates

rent_coordinates = extract_coordinates(rent_geojson)
school_coordinates = extract_coordinates(school_geojson)

house_coords = [(lon, lat) for lat, lon, _ in rent_coordinates]
school_coords = [(lon, lat) for lat, lon, _ in school_coordinates]

### using api to calculate the driving distance
def calculate_distance_matrix_in_batches(house_coords, school_coords, batch_size=1):
    all_shortest_distances = []
    
    for i in range(0, len(house_coords), batch_size):
        batch_houses = house_coords[i:i + batch_size]
        try:
            matrix = client.distance_matrix(
                locations=batch_houses + school_coords,
                profile='driving-car',
                sources=list(range(len(batch_houses))),
                destinations=list(range(len(batch_houses), len(batch_houses) + len(school_coords))),
                metrics=['distance'],
                units='km'
            )
            distances = matrix['distances']
            
            ### find mini distance
            for j, house_distances in enumerate(distances[:len(batch_houses)]):
                
                valid_distances = [d for d in house_distances if d is not None]
                if valid_distances:
                    min_distance = min(valid_distances)
                    closest_school_idx = house_distances.index(min_distance)
                    rent_address = rent_coordinates[i + j][2]
                    closest_school = school_coordinates[closest_school_idx][2]
                    all_shortest_distances.append({
                        'rent_address': rent_address,
                        'closest_school': closest_school,
                        'minimum_distance_school': min_distance
                    })
        except Exception as e:
            print(f"Error calculating distance matrix: {e}")
        time.sleep(1)  
    
    return all_shortest_distances

### batch size the data
shortest_distances = calculate_distance_matrix_in_batches(house_coords, school_coords, batch_size=1)

### save data
shortest_distances_df = pd.DataFrame(shortest_distances)
output_path = '../data/curated/external/API/4/closest_school_4_part5.csv'
shortest_distances_df.to_csv(output_path, index=False)

print(f"结果已保存到 {output_path}")


In [None]:
import geojson
import openrouteservice
import pandas as pd
import time

### part 1
api_key = '5b3ce3597851110001cf6248c2b2602de2dc43afa9f186cdfc39caca'  
client = openrouteservice.Client(key=api_key)


with open('../data/raw/external/API/4/property_4_geojson_part_1.geojson', 'r') as f:
    rent_geojson = geojson.load(f)

with open('../data/raw/external/API/4/school locations_geojson.geojson', 'r') as f:
    school_geojson = geojson.load(f)

### convert coordinates
def extract_coordinates(features):
    coordinates = []
    for feature in features['features']:
        lon, lat = feature['geometry']['coordinates']
        address = feature['properties'].get('address', 'unknown')
        coordinates.append((lat, lon, address))
    return coordinates

rent_coordinates = extract_coordinates(rent_geojson)
school_coordinates = extract_coordinates(school_geojson)


house_coords = [(lon, lat) for lat, lon, _ in rent_coordinates]
school_coords = [(lon, lat) for lat, lon, _ in school_coordinates]

### api calculate distance
def calculate_distance_matrix_in_batches(house_coords, school_coords, batch_size=1):
    all_shortest_distances = []
    
    for i in range(0, len(house_coords), batch_size):
        batch_houses = house_coords[i:i + batch_size]
        try:
            matrix = client.distance_matrix(
                locations=batch_houses + school_coords,
                profile='driving-car',
                sources=list(range(len(batch_houses))),
                destinations=list(range(len(batch_houses), len(batch_houses) + len(school_coords))),
                metrics=['distance'],
                units='km'
            )
            distances = matrix['distances']
            
            ### find the mini distance
            for j, house_distances in enumerate(distances[:len(batch_houses)]):
           
                valid_distances = [d for d in house_distances if d is not None]
                if valid_distances:
                    min_distance = min(valid_distances)
                    closest_school_idx = house_distances.index(min_distance)
                    rent_address = rent_coordinates[i + j][2]
                    closest_school = school_coordinates[closest_school_idx][2]
                    all_shortest_distances.append({
                        'rent_address': rent_address,
                        'closest_school': closest_school,
                        'minimum_distance_school': min_distance
                    })
        except Exception as e:
            print(f"Error calculating distance matrix: {e}")
        time.sleep(1) 
    
    return all_shortest_distances

### batch size
shortest_distances = calculate_distance_matrix_in_batches(house_coords, school_coords, batch_size=1)

### save
shortest_distances_df = pd.DataFrame(shortest_distances)
output_path = '../data/curated/external/API/4/closest_school_4_part1.csv'
shortest_distances_df.to_csv(output_path, index=False)

print(f"saved into {output_path}")


In [None]:
import geojson
import openrouteservice
import pandas as pd
import time

### part 2
api_key = '5b3ce3597851110001cf62487673c3a3ad924f9c822fd4f24b7e8ecc'  
client = openrouteservice.Client(key=api_key)

 
with open('../data/raw/external/API/4/property_4_geojson_part_2.geojson', 'r') as f:
    rent_geojson = geojson.load(f)

with open('../data/raw/external/API/4/school locations_geojson.geojson', 'r') as f:
    school_geojson = geojson.load(f)

### convert the coordinates
def extract_coordinates(features):
    coordinates = []
    for feature in features['features']:
        lon, lat = feature['geometry']['coordinates']
        address = feature['properties'].get('address', 'unknown')
        coordinates.append((lat, lon, address))
    return coordinates

rent_coordinates = extract_coordinates(rent_geojson)
school_coordinates = extract_coordinates(school_geojson)

house_coords = [(lon, lat) for lat, lon, _ in rent_coordinates]
school_coords = [(lon, lat) for lat, lon, _ in school_coordinates]

### using api calculate teh distance
def calculate_distance_matrix_in_batches(house_coords, school_coords, batch_size=1):
    all_shortest_distances = []
    
    for i in range(0, len(house_coords), batch_size):
        batch_houses = house_coords[i:i + batch_size]
        try:
            matrix = client.distance_matrix(
                locations=batch_houses + school_coords,
                profile='driving-car',
                sources=list(range(len(batch_houses))),
                destinations=list(range(len(batch_houses), len(batch_houses) + len(school_coords))),
                metrics=['distance'],
                units='km'
            )
            distances = matrix['distances']
            
            ### find mini distance
            for j, house_distances in enumerate(distances[:len(batch_houses)]):
                
                valid_distances = [d for d in house_distances if d is not None]
                if valid_distances:
                    min_distance = min(valid_distances)
                    closest_school_idx = house_distances.index(min_distance)
                    rent_address = rent_coordinates[i + j][2]
                    closest_school = school_coordinates[closest_school_idx][2]
                    all_shortest_distances.append({
                        'rent_address': rent_address,
                        'closest_school': closest_school,
                        'minimum_distance_school': min_distance
                    })
        except Exception as e:
            print(f"Error calculating distance matrix: {e}")
        time.sleep(1) 
    
    return all_shortest_distances

### batch size
shortest_distances = calculate_distance_matrix_in_batches(house_coords, school_coords, batch_size=1)

### save
shortest_distances_df = pd.DataFrame(shortest_distances)
output_path = '../data/curated/external/API/4/closest_school_4_part2.csv'
shortest_distances_df.to_csv(output_path, index=False)

print(f"saved into {output_path}")

In [None]:
import geojson
import openrouteservice
import pandas as pd
import time

### part 3
api_key = '5b3ce3597851110001cf6248ef157f3775f94378ac76799c0f696a53'  
client = openrouteservice.Client(key=api_key)


with open('../data/raw/external/API/4/property_4_geojson_part_3.geojson', 'r') as f:
    rent_geojson = geojson.load(f)

with open('../data/raw/external/API/4/school locations_geojson.geojson', 'r') as f:
    school_geojson = geojson.load(f)

### get cooridinate
def extract_coordinates(features):
    coordinates = []
    for feature in features['features']:
        lon, lat = feature['geometry']['coordinates']
        address = feature['properties'].get('address', 'unknown')
        coordinates.append((lat, lon, address))
    return coordinates

rent_coordinates = extract_coordinates(rent_geojson)
school_coordinates = extract_coordinates(school_geojson)

house_coords = [(lon, lat) for lat, lon, _ in rent_coordinates]
school_coords = [(lon, lat) for lat, lon, _ in school_coordinates]

### using api to calculate
def calculate_distance_matrix_in_batches(house_coords, school_coords, batch_size=1):
    all_shortest_distances = []
    
    for i in range(0, len(house_coords), batch_size):
        batch_houses = house_coords[i:i + batch_size]
        try:
            matrix = client.distance_matrix(
                locations=batch_houses + school_coords,
                profile='driving-car',
                sources=list(range(len(batch_houses))),
                destinations=list(range(len(batch_houses), len(batch_houses) + len(school_coords))),
                metrics=['distance'],
                units='km'
            )
            distances = matrix['distances']
            
            ### find mini distance
            for j, house_distances in enumerate(distances[:len(batch_houses)]):
                
                valid_distances = [d for d in house_distances if d is not None]
                if valid_distances:
                    min_distance = min(valid_distances)
                    closest_school_idx = house_distances.index(min_distance)
                    rent_address = rent_coordinates[i + j][2]
                    closest_school = school_coordinates[closest_school_idx][2]
                    all_shortest_distances.append({
                        'rent_address': rent_address,
                        'closest_school': closest_school,
                        'minimum_distance_school': min_distance
                    })
        except Exception as e:
            print(f"Error calculating distance matrix: {e}")
        time.sleep(1)  
    
    return all_shortest_distances


shortest_distances = calculate_distance_matrix_in_batches(house_coords, school_coords, batch_size=1)

#
shortest_distances_df = pd.DataFrame(shortest_distances)
output_path = '../data/curated/external/API/4/closest_school_4_part3.csv'
shortest_distances_df.to_csv(output_path, index=False)

print(f"saved into {output_path}")

In [None]:
import geojson
import openrouteservice
import pandas as pd
import time

### part 4
api_key = '5b3ce3597851110001cf6248ef157f3775f94378ac76799c0f696a53' 
client = openrouteservice.Client(key=api_key)


with open('../data/raw/external/API/4/property_4_geojson_part_4.geojson', 'r') as f:
    rent_geojson = geojson.load(f)

with open('../data/raw/external/API/4/school locations_geojson.geojson', 'r') as f:
    school_geojson = geojson.load(f)

### find lat and long
def extract_coordinates(features):
    coordinates = []
    for feature in features['features']:
        lon, lat = feature['geometry']['coordinates']
        address = feature['properties'].get('address', 'unknown')
        coordinates.append((lat, lon, address))
    return coordinates

rent_coordinates = extract_coordinates(rent_geojson)
school_coordinates = extract_coordinates(school_geojson)

house_coords = [(lon, lat) for lat, lon, _ in rent_coordinates]
school_coords = [(lon, lat) for lat, lon, _ in school_coordinates]

### using api
def calculate_distance_matrix_in_batches(house_coords, school_coords, batch_size=1):
    all_shortest_distances = []
    
    for i in range(0, len(house_coords), batch_size):
        batch_houses = house_coords[i:i + batch_size]
        try:
            matrix = client.distance_matrix(
                locations=batch_houses + school_coords,
                profile='driving-car',
                sources=list(range(len(batch_houses))),
                destinations=list(range(len(batch_houses), len(batch_houses) + len(school_coords))),
                metrics=['distance'],
                units='km'
            )
            distances = matrix['distances']
            
            ### find mini distance
            for j, house_distances in enumerate(distances[:len(batch_houses)]):
                
                valid_distances = [d for d in house_distances if d is not None]
                if valid_distances:
                    min_distance = min(valid_distances)
                    closest_school_idx = house_distances.index(min_distance)
                    rent_address = rent_coordinates[i + j][2]
                    closest_school = school_coordinates[closest_school_idx][2]
                    all_shortest_distances.append({
                        'rent_address': rent_address,
                        'closest_school': closest_school,
                        'minimum_distance_school': min_distance
                    })
        except Exception as e:
            print(f"Error calculating distance matrix: {e}")
        time.sleep(1)  
    
    return all_shortest_distances


shortest_distances = calculate_distance_matrix_in_batches(house_coords, school_coords, batch_size=1)


shortest_distances_df = pd.DataFrame(shortest_distances)
output_path = '../data/curated/external/API/4/closest_school_4_part4.csv'
shortest_distances_df.to_csv(output_path, index=False)

print(f"save into {output_path}")

In [None]:
school_csv = ['../data/curated/external/API/4/closest_school_4_part1.csv', 
              '../data/curated/external/API/4/closest_school_4_part2.csv', 
              '../data/curated/external/API/4/closest_school_4_part3.csv',
              '../data/curated/external/API/4/closest_school_4_part4.csv', 
              '../data/curated/external/API/4/closest_school_4_part5.csv']

school_list = []
### combine the parts into the final one
for parts in school_csv:
    df = pd.read_csv(parts)  
    school_list.append(df)     

combined_df = pd.concat(school_list, ignore_index=True)

### save
combined_df.to_csv('../data/curated/external/API/4/closest_school_distance_final_4.csv', index=False)

print("CSV saved")
