In [None]:
import numpy as np
import math
import openrouteservice as ors

def cal_distance_duration_batch_large(coordinates_list, client):
    '''
    This function calculates the driving distance and duration for multiple rental locations to all train stations
    in batches where origin x destination <= 3500.
    '''
    print("use this")
    batch_size = 50  
    num_batches = int(np.ceil(len(coordinates_list) / batch_size))
    
    all_distances = []
    all_durations = []
    
    for i in range(num_batches):
        batch_start = i * batch_size
        batch_end = min((i + 1) * batch_size, len(coordinates_list))
        batch_coordinates = coordinates_list[batch_start:batch_end]
        
        matrix = client.distance_matrix(
            locations=batch_coordinates,
            destinations=[i for i in range(len(batch_coordinates))],
            profile='driving-car',
            metrics=['distance', 'duration'],
            validate=False,
        )
        
        distances = matrix['distances']
        durations = matrix['durations']
        
        all_distances.extend(distances)
        all_durations.extend(durations)
    
    return all_distances, all_durations

def cal_distance_duration_batch_small(coordinates_list, client):
    '''
    This function calculates the driving distance and duration for multiple rental locations to all train stations
    in smaller batches where locations <= 25.
    '''
    max_locations = 25
    num_batches = int(math.ceil(len(coordinates_list) / max_locations))
    
    all_distances = []
    all_durations = []
    
    for i in range(num_batches):
        batch_start = i * max_locations
        batch_end = min((i + 1) * max_locations, len(coordinates_list))
        batch_coordinates = coordinates_list[batch_start:batch_end]
        
        for j in range(0, len(batch_coordinates), max_locations):
            sub_batch_end = min(j + max_locations, len(batch_coordinates))
            sub_batch = batch_coordinates[j:sub_batch_end]
            
            matrix = client.distance_matrix(
                locations=sub_batch,
                destinations=[i for i in range(len(sub_batch))],
                profile='driving-car',
                metrics=['distance', 'duration'],
                validate=False,
            )
            
            distances = matrix['distances']
            durations = matrix['durations']
            
            all_distances.extend(distances)
            all_durations.extend(durations)
    
    return all_distances, all_durations


client = ors.Client(key='')  # CHANGE TO YOU KEY #5b3ce3597851110001cf6248c37b113ef7624ea4afac15bc7da77cc6
train = pd.read_csv("../data/raw/external/train_station/metropolitan_train_location_2023.csv") 
# Prepare the data
reduced_df = df.iloc[:1,:].copy() #100 records 占 7 quotas, 500 rentals use 20s
train_stations = list(zip(train['Stop_long'], train['Stop_lat']))
all_coordinates = [(rental['longtitude'], rental['latitude']) for _, rental in reduced_df.iterrows()] + train_stations

# Calculate distances and durations
if len(all_coordinates) <= 3500:
    distances, durations = cal_distance_duration_batch_large(all_coordinates, client)
else:
    distances, durations = cal_distance_duration_batch_small(all_coordinates, client)


In [None]:
# Extract distances and durations for rental properties only
num_rentals = len(reduced_df)
rental_distances = distances[:num_rentals]
rental_durations = durations[:num_rentals]

def exclude_zero_and_find_min(values):
    """ Helper function to exclude zero values and find the minimum. """
    non_zero_values = [val for val in values if val != 0]
    return min(non_zero_values) if non_zero_values else None 

# Process each rental property
min_distances = [exclude_zero_and_find_min(dist) for dist in rental_distances]
min_durations = [exclude_zero_and_find_min(dur) for dur in rental_durations]

# Add the results to the DataFrame
reduced_df['min_distance'] = min_distances
reduced_df['min_duration'] = min_durations

In [None]:
# 2000 & 3000 runs for ages, 15 minutes
def calculate_distance(coord1, coord2):
    return distance(coord1, coord2).kilometers

def count_schools_within_radius(rent_df, schools_df, radius=5):
    # Add a new column to store the number of schools within the radius
    rent_df['schools_within_5km'] = 0
    
    # Iterate over each rental property
    for i, rent_row in rent_df.iterrows():
        rent_location = (rent_row['latitude'], rent_row['longtitude'])
        
        # Count the number of schools within the radius
        count = 0
        for j, school_row in schools_df.iterrows():
            school_location = (school_row['latitude'], school_row['longitude'])
            distance = calculate_distance(rent_location, school_location)
            
            if distance <= radius:
                count += 1
                
        # Store the count in the rental dataframe
        rent_df.at[i, 'schools_within_5km'] = count

    return rent_df

new_df = count_schools_within_radius(df,schools)#.iloc[:,:])