### This file is dedicated to calculate route distance from each property to nearest electricity infrastructure

Created by Yuecheng Wang 13-09-2024

In [10]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [11]:
# import scripts for api calculation
import sys
import os

# Add the scripts folder to the Python path
scripts_path = os.path.abspath('../../scripts')

if scripts_path not in sys.path:
    sys.path.append(scripts_path)

from api_function import parse_coordinate, calculate_distance_car, find_closest_elec, parse_elec_coord

In [12]:
elec = pd.read_csv('../../data/raw/Foundation_Electricity_Infrastructure/foundation_electricity_infrastructure.csv')

In [13]:
elec.head(5)

Unnamed: 0,feature_type,name,operational_status,feature_date,feature_source,attribute_date,attribute_source,custodian_agency,custodian_licensing,loading_date,class,voltage,address,geometry,owner
0,Substation,Oaklands Hill Wind Farm,Operational,1392077000000.0,Esri World Imagery,1360800000000,AEMO-Australian Energy Market Operators 2013,Geoscience Australia,This material is released under the Creative C...,1602806400000,Switchyard,66.0,Glenthompson Victoria,POINT (142.55225991500004 -37.68147181699993),
1,Substation,Mortons Lane Wind Farm,Operational,1329955000000.0,Esri World Imagery,1360800000000,AEMO-Australian Energy Market Operators 2013,Geoscience Australia,This material is released under the Creative C...,1602806400000,Switchyard,66.0,Woodhouse Victoria,POINT (142.4662795470001 -37.83515724799997),
2,Substation,Ballarat North,Operational,1320624000000.0,Esri World Imagery,1360800000000,AEMO-Australian Energy Market Operators 2013,Geoscience Australia,This material is released under the Creative C...,1602806400000,Substation,66.0,Wendouree Victoria,POINT (143.84689522400004 -37.534611850999966),
3,Substation,Challicum Hills,Operational,1347322000000.0,Esri World Imagery,1360800000000,AEMO-Australian Energy Market Operators 2013,Geoscience Australia,This material is released under the Creative C...,1602806400000,Substation,66.0,Buangor Victoria,POINT (143.15238623800008 -37.35935152299993),
4,Substation,Ararat,Operational,1325462000000.0,Esri World Imagery,1360800000000,AEMO-Australian Energy Market Operators 2013,Geoscience Australia,This material is released under the Creative C...,1602806400000,Substation,66.0,Ararat Victoria,POINT (142.92417038000008 -37.26872753199996),


In [14]:
# Apply the function to the 'geometry' column and create new columns
elec[['latitude', 'longitude']] = elec['geometry'].apply(parse_elec_coord)

# Keep columns useful
elec = elec[['name', 'latitude', 'longitude']]

print(elec.head())

                      name   latitude   longitude
0  Oaklands Hill Wind Farm -37.681472  142.552260
1   Mortons Lane Wind Farm -37.835157  142.466280
2           Ballarat North -37.534612  143.846895
3          Challicum Hills -37.359352  143.152386
4                   Ararat -37.268728  142.924170


In [15]:
# readin domain data
file_path = "../../data/raw/domain/all_postcodes.csv"
domain_data = pd.read_csv(file_path)

In [16]:
# test on first 5
sample_domain = domain_data.head(5)

In [17]:
route_distances = []
for index, row in sample_domain.iterrows():
    property_coords = parse_coordinate(row['Coordinates'])
    
    closest_elec = find_closest_elec(property_coords, elec.copy())
    
    if isinstance(closest_elec, int):
        # Handle invalid coordinates
        print(f"Skipping property at index {index} due to invalid coordinates.")
        route_distances.append(-1)  # Store a placeholder value
        continue
        
    # Get closest elec coordinates
    elec_coords = (closest_elec['latitude'], closest_elec['longitude'])
    
    # Calculate route distance using OSRM API
    route_distance = calculate_distance_car(row, elec_coords)

    print(closest_elec)
    print(route_distance)
    print("------")

name         Queen Victoria Market Solar Array
latitude                            -37.806944
longitude                           144.956944
distance                             423.03686
Name: 140, dtype: object
0.8477
------
name         Queen Victoria Market Solar Array
latitude                            -37.806944
longitude                           144.956944
distance                            771.146572
Name: 140, dtype: object
1.3123
------
name            Carlton
latitude     -37.806118
longitude     144.96065
distance     254.622277
Name: 129, dtype: object
0.8981
------
name            Carlton
latitude     -37.806118
longitude     144.96065
distance     321.980821
Name: 129, dtype: object
0.8288
------
name           Carlton
latitude    -37.806118
longitude    144.96065
distance      351.1298
Name: 129, dtype: object
0.8287
------


In [9]:
# Split the dataset into 10 parts
num_parts = 20
split_data = np.array_split(domain_data, num_parts)

# List to store paths of temporary files
temp_files = []

# Process each part separately
for i, part in enumerate(split_data):
    temp_file = f'../../data/raw/domain/elec_distance_part_{i+1}.csv'
    
    # Check if the file already exists
    if os.path.exists(temp_file):
        print(f"File for Part {i+1} already exists. Skipping processing for this part.")
        temp_files.append(temp_file)
        continue

    route_distances = []

    # Iterate through the part and calculate distances
    for index, row in tqdm(part.iterrows(), total=len(part), desc=f"Processing Part {i+1}"):
        property_coords = parse_coordinate(row['Coordinates'])
        
        closest_elec = find_closest_elec(property_coords, elec.copy())
        
        if isinstance(closest_elec, int):
            # Handle invalid coordinates
            print(f"Skipping property at index {index} due to invalid coordinates.")
            route_distances.append(-1)  # Store a placeholder value
            continue
            
        # Get closest elec coordinates
        elec_coords = (closest_elec['latitude'], closest_elec['longitude'])
        
        # Calculate route distance using OSRM API
        route_distance = calculate_distance_car(row, elec_coords)
        route_distances.append(route_distance)

    # Create a DataFrame for the current part's distances
    route_distance_df = pd.DataFrame(route_distances, columns=['route_distance_km'])

    # Save the current part to a temporary file
    route_distance_df.to_csv(temp_file, index=False)
    temp_files.append(temp_file)

    # Print completion message for each part
    print(f"Completed processing Part {i+1}/{num_parts}.")

# Combine all parts into one large file
combined_df = pd.concat([pd.read_csv(file) for file in temp_files])


output_relative_dir = '../../data/raw/domain/'
if not os.path.exists(output_relative_dir):
    os.makedirs(output_relative_dir)

# Save the combined DataFrame
combined_file = '../../data/raw/domain/elec_distance.csv'
combined_df.to_csv(combined_file, index=False)

# Delete the temporary files
for file in temp_files:
    os.remove(file)

print(f"Combined file saved at {combined_file}. Temporary files deleted.")

  return bound(*args, **kwds)
Processing Part 1: 100%|██████████████████████████████████████████████████████████████████| 3/3 [00:03<00:00,  1.22s/it]


Completed processing Part 1/4000.


Processing Part 2:   0%|                                                                          | 0/3 [00:00<?, ?it/s]


KeyboardInterrupt: 