### This file is dedicated to calculate route distance from each property to nearest hospital

Created by Yuecheng Wang 13-09-2024

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
# import scripts for api calculation
import sys
import os

# Add the scripts folder to the Python path
scripts_path = os.path.abspath('../../scripts')

if scripts_path not in sys.path:
    sys.path.append(scripts_path)

from api_function import parse_coordinate, calculate_distance_car, find_closest_hospital

In [3]:
hospital = pd.read_csv('../../data/raw/Hospital/Hospital_info.csv')

In [4]:
hospital.head(5)

Unnamed: 0,OBJECTID,NHSD_OPERATIONALSTATUS,NHSD_ORGANISATION_NAME,NHSD_ADDRESS,NHSD_SUBURB,NHSD_STATE,NHSD_POSTCODE,NHSD_LONG,NHSD_LAT,NHSD_SERVICE_ID,NHSD_SERVICE_TYPE,GNAF_ADDRESS_DETAIL_PID,GA_CLASS,GA_SOURCE_DATE
0,7456,active,Monash Children's PICU,246 Clayton Road Clayton VIC 3168,Clayton,VIC,3168,145.123962,-37.920376,e2ef3edd-503e-f5d9-54cd-e25d76b5f045,Hospital service,,HOSPITAL,20240305
1,7459,active,Heidelberg Endoscopy and Day Surgery Centre,179 Northern Road Heidelberg Heights VIC 3081,Heidelberg Heights,VIC,3081,145.060425,-37.730438,0508a3e3-cb0b-9150-1402-bc47d113876a,Private same-day hospital service,GAVIC421557623,HOSPITAL,20240305
2,7462,active,Mallee Track Health & Community Service - Ouyen,28 Britt Street Ouyen VIC 3490,Ouyen,VIC,3490,142.312759,-35.074005,65c17552-fd3c-82a6-10ae-02fa4af79c14,Hospital service,GAVIC412068132,HOSPITAL,20240305
3,7465,active,West Wimmera Health Service - Rainbow Campus,2 Swinbourne Avenue Rainbow VIC 3424,Rainbow,VIC,3424,141.994843,-35.903839,cdb3c041-8206-b397-663d-ae35f16371e9,Emergency department service,,HOSPITAL,20240305
4,7468,active,Springvale Endoscopy Centre/Day Hospital,20 Balmoral Avenue Springvale VIC 3171,Springvale,VIC,3171,145.149612,-37.949612,71e93fcd-4459-7354-7b9c-e5a17e179987,Private same-day hospital service,,HOSPITAL,20240305


In [5]:
# readin domain data
file_path = "../../data/raw/domain/all_postcodes.csv"
domain_data = pd.read_csv(file_path)

In [6]:
# test on first 5
sample_domain = domain_data.head(5)

In [7]:
route_distances = []
for index, row in sample_domain.iterrows():
    property_coords = parse_coordinate(row['Coordinates'])
    
    closest_hospital = find_closest_hospital(property_coords, hospital.copy())
    
    if isinstance(closest_hospital, int):
        # Handle invalid coordinates
        print(f"Skipping property at index {index} due to invalid coordinates.")
        route_distances.append(-1)  # Store a placeholder value
        continue
        
    # Get closest station coordinates
    hospital_coords = (closest_hospital['NHSD_LAT'], closest_hospital['NHSD_LONG'])
    
    # Calculate route distance using OSRM API
    route_distance = calculate_distance_car(row, hospital_coords)

    print(closest_hospital)
    print(route_distance)
    print("------")

OBJECTID                                                   7718
NHSD_OPERATIONALSTATUS                                   active
NHSD_ORGANISATION_NAME                    Skin Health Institute
NHSD_ADDRESS                80 Drummond Street Carlton VIC 3053
NHSD_SUBURB                                             Carlton
NHSD_STATE                                                  VIC
NHSD_POSTCODE                                              3053
NHSD_LONG                                            144.968613
NHSD_LAT                                             -37.804886
NHSD_SERVICE_ID            757d4a3a-b2de-e792-d2d3-a0aabc370ba3
NHSD_SERVICE_TYPE             Private same-day hospital service
GNAF_ADDRESS_DETAIL_PID                          GAVIC423629334
GA_CLASS                                               HOSPITAL
GA_SOURCE_DATE                                         20240305
distance                                            1212.502636
Name: 52, dtype: object
1.6536
------
OB

All data

In [10]:
# Split the dataset into 10 parts
num_parts = 100
split_data = np.array_split(domain_data, num_parts)

# List to store paths of temporary files
temp_files = []

# Process each part separately
for i, part in enumerate(split_data):
    temp_file = f'../../data/raw/domain/hospital_distance_part_{i+1}.csv'
    
    # Check if the file already exists
    if os.path.exists(temp_file):
        print(f"File for Part {i+1} already exists. Skipping processing for this part.")
        temp_files.append(temp_file)
        continue

    route_distances = []

    # Iterate through the part and calculate distances
    for index, row in tqdm(part.iterrows(), total=len(part), desc=f"Processing Part {i+1}"):
        property_coords = parse_coordinate(row['Coordinates'])
        closest_hospital = find_closest_hospital(property_coords, hospital.copy())

        if isinstance(closest_hospital, int):
            print(f"Skipping property at index {index} due to invalid coordinates.")
            route_distances.append(-1)
            continue
        
        hospital_coords = (closest_hospital['NHSD_LAT'], closest_hospital['NHSD_LONG'])
        route_distance = calculate_distance_car(row, hospital_coords)
        route_distances.append(route_distance)

    # Create a DataFrame for the current part's distances
    route_distance_df = pd.DataFrame(route_distances, columns=['route_distance_km'])

    # Save the current part to a temporary file
    route_distance_df.to_csv(temp_file, index=False)
    temp_files.append(temp_file)

    # Print completion message for each part
    print(f"Completed processing Part {i+1}/{num_parts}.")

# Combine all parts into one large file
combined_df = pd.concat([pd.read_csv(file) for file in temp_files])

# Save the combined DataFrame
combined_file = '../../data/raw/domain/hospital_distance.csv'
combined_df.to_csv(combined_file, index=False)

# Delete the temporary files
for file in temp_files:
    os.remove(file)

print(f"Combined file saved at {combined_file}. Temporary files deleted.")

  return bound(*args, **kwds)


File for Part 1 already exists. Skipping processing for this part.


Processing Part 2:   4%|██▍                                                              | 3/81 [00:04<01:55,  1.48s/it]


KeyboardInterrupt: 