In [1]:
import pandas as pd

In [None]:
import pandas as pd

def calculate_total_destinations(input_csv_path, column_names=None, output_csv_path=None, ):
    """
    Calculate the total number of destinations (to_id) for each origin (from_id) in the travel time matrix.

    Parameters:
    input_csv_path (str): Path to the input CSV file containing the travel time matrix.
    output_csv_path (str, optional): Path to save the output CSV file with the total number of destinations for each origin. Default is None.
    column_names (dict, optional): A dictionary to rename the columns. Keys are the original column names and values are the new column names.

    Returns:
    pd.DataFrame: A DataFrame containing the total number of destinations for each origin.
    """
    # Load the travel time matrix CSV file
    travel_time_matrix = pd.read_csv(input_csv_path)
    
    # Group by 'from_id' and calculate the count of 'to_id' for each 'from_id'
    total_destinations = travel_time_matrix.groupby('from_id')['to_id'].count().reset_index()
    
    # Rename the columns for clarity
    total_destinations.columns = ['from_id', 'total_to_id']
    
    # Rename columns if column_names is provided
    if column_names:
        total_destinations.rename(columns=column_names, inplace=True)
    
    # Save the result to a CSV file if output_csv_path is provided
    if output_csv_path:
        total_destinations.to_csv(output_csv_path, index=False)
    
    return total_destinations

In [34]:
# Example usage
input_csv_path = "../results/TTM_CT_hospitals_threshold_30.csv"
# output_csv_path = "../results/total_destinations.csv"
column_names = {'from_id': 'CTUID', 'total_to_id': 'total_hospitals_within_30_mins'}

total_destinations_df = calculate_total_destinations(input_csv_path, column_names)
total_destinations_df["total_hospitals_within_30_mins"]=total_destinations_df["total_hospitals_within_30_mins"]+1
print(total_destinations_df["total_hospitals_within_30_mins"])

0      3
1      4
2      4
3      4
4      4
      ..
342    2
343    2
344    2
345    2
346    2
Name: total_hospitals_within_30_mins, Length: 347, dtype: int64


In [33]:
import geopandas as gpd
import pandas as pd

def merge_gdf_with_df(gdf, df, key, new_column_names=None, save_path=None):
    """
    Merge a GeoDataFrame with a DataFrame based on a specific key and rename the columns in the DataFrame.

    Parameters:
    gdf (GeoDataFrame): The original GeoDataFrame.
    df (DataFrame): The DataFrame to merge with the GeoDataFrame.
    key (str): The column name to join on.
    new_column_names (dict, optional): A dictionary to rename the columns in the DataFrame. Keys are the original column names and values are the new column names.
    save_path (str, optional): Path to save the updated GeoDataFrame to a file. Default is None.

    Returns:
    GeoDataFrame: The merged GeoDataFrame with the new columns added and renamed.
    """
    # Convert the key columns in both DataFrames to the same type
    gdf[key] = gdf[key].astype(float)
    df[key] = df[key].astype(float)
    
    # Rename columns in the DataFrame if new_column_names is provided
    if new_column_names:
        df = df.rename(columns=new_column_names)
    
    # Merge the GeoDataFrame with the DataFrame on the specified key
    merged_gdf = gdf.merge(df, on=key, how='left')
    merged_gdf = merged_gdf.fillna(0)
    # Save the updated GeoDataFrame to a file if save_path is provided
    if save_path:
        merged_gdf.to_file(save_path, driver='GeoJSON')
    
    return merged_gdf

# Example usage
geojson_path = '../../../data/census_tract_data/boundaries_centroid_combined_data.geojson'
gdf = gpd.read_file(geojson_path)
pdf = total_destinations_df
# Define the new column names
new_column_names = {
    'CTUID': 'CTUID',
    'total_hospitals_within_30_mins': 'total_hospitals_within_30_mins_before'
}

# Merge the GeoDataFrame with the DataFrame and rename columns
output_geojson_path = '../results/boundaries_centroid_combined_data_with_hospital.geojson'
combined_boundaries_with_hospitals = merge_gdf_with_df(gdf, pdf, 'CTUID', new_column_names, save_path=output_geojson_path)

# Print the updated GeoDataFrame
print(combined_boundaries_with_hospitals.head(20)["CTUID"])
print(combined_boundaries_with_hospitals.head(20)["total_hospitals_within_30_mins_before"])
# .fillna(0)

0     5350128.04
1     5350363.06
2     5350363.07
3     5350378.23
4     5350378.24
5     5350010.01
6     5350010.02
7     5350001.00
8     5350002.00
9     5350003.00
10    5350004.00
11    5350005.00
12    5350006.00
13    5350009.00
14    5350014.00
15    5350015.00
16    5350016.00
17    5350018.00
18    5350019.00
19    5350020.00
Name: CTUID, dtype: float64
0      3.0
1      2.0
2      2.0
3      0.0
4      0.0
5      4.0
6      7.0
7      0.0
8      0.0
9      2.0
10     3.0
11     3.0
12     3.0
13     4.0
14    11.0
15    11.0
16     5.0
17     1.0
18     1.0
19     0.0
Name: total_hospitals_within_30_mins_before, dtype: float64


In [35]:
geojson_path = '../results/boundaries_centroid_combined_data_with_hospital.geojson'
gdf = gpd.read_file(geojson_path)
pdf = total_destinations_df
# Define the new column names
new_column_names = {
    'CTUID': 'CTUID',
    'total_hospitals_within_30_mins': 'total_hospitals_within_30_mins_after'
}

# Merge the GeoDataFrame with the DataFrame and rename columns
output_geojson_path = '../results/boundaries_centroid_combined_data_with_hospital_before_after.geojson'
combined_boundaries_with_hospitals = merge_gdf_with_df(gdf, pdf, 'CTUID', new_column_names, save_path=output_geojson_path)

# Print the updated GeoDataFrame
print(combined_boundaries_with_hospitals.head(5)["CTUID"])
print(combined_boundaries_with_hospitals.head(5)["total_hospitals_within_30_mins_before"])
print(combined_boundaries_with_hospitals.head(5)["total_hospitals_within_30_mins_after"])

0    5350128.04
1    5350363.06
2    5350363.07
3    5350378.23
4    5350378.24
Name: CTUID, dtype: float64
0    3.0
1    2.0
2    2.0
3    0.0
4    0.0
Name: total_hospitals_within_30_mins_before, dtype: float64
0    4.0
1    3.0
2    3.0
3    0.0
4    0.0
Name: total_hospitals_within_30_mins_after, dtype: float64
