In [37]:
import pandas as pd

In [40]:
import pandas as pd

def calculate_total_destinations(input_csv_path, column_names=None, output_csv_path=None, ):
    """
    Calculate the total number of destinations (to_id) for each origin (from_id) in the travel time matrix.

    Parameters:
    input_csv_path (str): Path to the input CSV file containing the travel time matrix.
    output_csv_path (str, optional): Path to save the output CSV file with the total number of destinations for each origin. Default is None.
    column_names (dict, optional): A dictionary to rename the columns. Keys are the original column names and values are the new column names.

    Returns:
    pd.DataFrame: A DataFrame containing the total number of destinations for each origin.
    """
    # Load the travel time matrix CSV file
    travel_time_matrix = pd.read_csv(input_csv_path)
    
    # Group by 'from_id' and calculate the count of 'to_id' for each 'from_id'
    total_destinations = travel_time_matrix.groupby('from_id')['to_id'].count().reset_index()
    
    # Rename the columns for clarity
    total_destinations.columns = ['from_id', 'total_to_id']
    
    # Rename columns if column_names is provided
    if column_names:
        total_destinations.rename(columns=column_names, inplace=True)
    
    # Save the result to a CSV file if output_csv_path is provided
    if output_csv_path:
        total_destinations.to_csv(output_csv_path, index=False)
    
    return total_destinations

In [68]:
# Example usage
input_csv_path = "../results/TTM_CT_hospitals_threshold_30.csv"
# output_csv_path = "../results/total_destinations.csv"
column_names = {'from_id': 'CTUID', 'total_to_id': 'total_hospitals_within_30_mins'}

total_destinations_df = calculate_total_destinations(input_csv_path, column_names)
# total_destinations_df["total_hospitals_within_30_mins"]=total_destinations_df["total_hospitals_within_30_mins"]+1
print(total_destinations_df["total_hospitals_within_30_mins"])

0      2
1      3
2      3
3      3
4      3
      ..
342    1
343    1
344    1
345    1
346    1
Name: total_hospitals_within_30_mins, Length: 347, dtype: int64


In [69]:
import geopandas as gpd
import pandas as pd

def merge_gdf_with_df(gdf, df, key, new_column_names=None, save_path=None):
    """
    Merge a GeoDataFrame with a DataFrame based on a specific key and rename the columns in the DataFrame.

    Parameters:
    gdf (GeoDataFrame): The original GeoDataFrame.
    df (DataFrame): The DataFrame to merge with the GeoDataFrame.
    key (str): The column name to join on.
    new_column_names (dict, optional): A dictionary to rename the columns in the DataFrame. Keys are the original column names and values are the new column names.
    save_path (str, optional): Path to save the updated GeoDataFrame to a file. Default is None.

    Returns:
    GeoDataFrame: The merged GeoDataFrame with the new columns added and renamed.
    """
    # Convert the key columns in both DataFrames to the same type
    gdf[key] = gdf[key].astype(float)
    df[key] = df[key].astype(float)
    
    # Rename columns in the DataFrame if new_column_names is provided
    if new_column_names:
        df = df.rename(columns=new_column_names)
    
    # Merge the GeoDataFrame with the DataFrame on the specified key
    merged_gdf = gdf.merge(df, on=key, how='left')
    merged_gdf = merged_gdf.fillna(0)
    # Save the updated GeoDataFrame to a file if save_path is provided
    if save_path:
        merged_gdf.to_file(save_path, driver='GeoJSON')
    
    return merged_gdf

# Example usage
geojson_path = '../../../data/census_tract_data/boundaries_centroid_combined_data.geojson'
gdf = gpd.read_file(geojson_path)
pdf = total_destinations_df
# Define the new column names
new_column_names = {
    'CTUID': 'CTUID',
    'total_hospitals_within_30_mins': 'total_hospitals_within_30_mins_before'
}

# Merge the GeoDataFrame with the DataFrame and rename columns
output_geojson_path = '../results/boundaries_centroid_combined_data_with_hospital.geojson'
combined_boundaries_with_hospitals = merge_gdf_with_df(gdf, pdf, 'CTUID', new_column_names, save_path=output_geojson_path)

# Print the updated GeoDataFrame
print(combined_boundaries_with_hospitals.head(5)["CTUID"])
print(combined_boundaries_with_hospitals.head(5)["total_hospitals_within_30_mins_before"])
# .fillna(0)

0    5350128.04
1    5350363.06
2    5350363.07
3    5350378.23
4    5350378.24
Name: CTUID, dtype: float64
0    3.0
1    2.0
2    2.0
3    0.0
4    0.0
Name: total_hospitals_within_30_mins_before, dtype: float64


In [70]:
import random
geojson_path = '../results/boundaries_centroid_combined_data_with_hospital.geojson'
gdf = gpd.read_file(geojson_path)

after_total_destinations = total_destinations_df.copy()
after_total_destinations["total_hospitals_within_30_mins"]=total_destinations_df["total_hospitals_within_30_mins"] + after_total_destinations.apply(lambda row: random.randint(1, 3), axis=1)
pdf = after_total_destinations
# Define the new column names
new_column_names = {
    'CTUID': 'CTUID',
    'total_hospitals_within_30_mins': 'total_hospitals_within_30_mins_after'
}

# Merge the GeoDataFrame with the DataFrame and rename columns
output_geojson_path = '../results/boundaries_centroid_combined_data_with_hospital_before_after_diff.geojson'
combined_boundaries_with_hospitals = merge_gdf_with_df(gdf, pdf, 'CTUID', new_column_names, save_path=output_geojson_path)

# Print the updated GeoDataFrame
print(combined_boundaries_with_hospitals.head(5)["CTUID"])
print(combined_boundaries_with_hospitals.head(5)["total_hospitals_within_30_mins_before"])
print(combined_boundaries_with_hospitals.head(5)["total_hospitals_within_30_mins_after"])

0    5350128.04
1    5350363.06
2    5350363.07
3    5350378.23
4    5350378.24
Name: CTUID, dtype: float64
0    3.0
1    2.0
2    2.0
3    0.0
4    0.0
Name: total_hospitals_within_30_mins_before, dtype: float64
0    5.0
1    4.0
2    3.0
3    0.0
4    0.0
Name: total_hospitals_within_30_mins_after, dtype: float64


In [71]:
import random
geojson_path = '../results/boundaries_centroid_combined_data_with_hospital_before_after_diff.geojson'
gdf = gpd.read_file(geojson_path)

diff_total_destinations = after_total_destinations.copy()
diff_total_destinations["total_hospitals_within_30_mins"]=after_total_destinations["total_hospitals_within_30_mins"] - total_destinations_df["total_hospitals_within_30_mins"]
pdf = diff_total_destinations
# Define the new column names
new_column_names = {
    'CTUID': 'CTUID',
    'total_hospitals_within_30_mins': 'total_hospitals_within_30_mins_diff'
}

# Merge the GeoDataFrame with the DataFrame and rename columns
output_geojson_path = '../results/boundaries_centroid_combined_data_with_hospital_before_after_diff.geojson'
combined_boundaries_with_hospitals = merge_gdf_with_df(gdf, pdf, 'CTUID', new_column_names, save_path=output_geojson_path)

# Print the updated GeoDataFrame
print(combined_boundaries_with_hospitals.head(5)["CTUID"])
print(combined_boundaries_with_hospitals.head(5)["total_hospitals_within_30_mins_before"])
print(combined_boundaries_with_hospitals.head(5)["total_hospitals_within_30_mins_diff"])

0    5350128.04
1    5350363.06
2    5350363.07
3    5350378.23
4    5350378.24
Name: CTUID, dtype: float64
0    3.0
1    2.0
2    2.0
3    0.0
4    0.0
Name: total_hospitals_within_30_mins_before, dtype: float64
0    2.0
1    2.0
2    1.0
3    0.0
4    0.0
Name: total_hospitals_within_30_mins_diff, dtype: float64


In [72]:
# Convert the GeoDataFrame to a CSV file
csv_output_path = '../results/boundaries_centroid_combined_data_with_hospital_before_after_diff.csv'
combined_boundaries_with_hospitals.to_csv(csv_output_path, index=False)

# Print a message to confirm the conversion
print(f"GeoJSON data has been successfully converted to CSV and saved to {csv_output_path}")

GeoJSON data has been successfully converted to CSV and saved to ../results/boundaries_centroid_combined_data_with_hospital_before_after_diff.csv
