In [7]:
import pandas as pd
import json
import geopandas as gpd
from shapely.geometry import Point
import requests
from collections import defaultdict

In [8]:
# read into shape file
sa2_sf = gpd.read_file("../data/shapefile/SA2/SA2_2021_AUST_GDA2020.shp")
sa2_sf['geometry'] = sa2_sf['geometry'].to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
victoria_sa2 = sa2_sf[sa2_sf['STE_NAME21'] == 'Victoria']

In [9]:
lga_sf = gpd.read_file("../data/shapefile/LGA/LGA_2021_AUST_GDA2020.shp")
lga_sf['geometry'] = lga_sf['geometry'].to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
victoria_lga = lga_sf[lga_sf['STE_NAME21'] == 'Victoria']
victoria_lga.to_file('../data/raw/victoria_lga.geojson', driver='GeoJSON')

In [10]:
mapping = []
victoria_lga = victoria_lga.reset_index(drop=True)
victoria_sa2 = victoria_sa2.reset_index(drop=True)

# iterate over each sa2 district:
for index, sa2_row in victoria_sa2.iterrows():
    # Check if the geometry is not None
    if sa2_row['geometry'] is not None:
        # Find the intersection area between the current sa2 district and all lga districts
        intersections = victoria_lga.intersection(sa2_row['geometry'])
        
        # Calculate the area of each intersection
        areas = intersections.area
        
        # Find the index of the lga district with the maximum intersection area
        max_area_index = areas.idxmax()
        
        # Check if max_area_index is not NaN and the maximum intersection area is greater than zero
        if pd.notna(max_area_index) and areas[max_area_index] > 0:
            # Append the mapping to the list
            mapping.append({
                'SA2_CODE21': sa2_row['SA2_CODE21'],
                'SA2_NAME21': sa2_row['SA2_NAME21'],  # Extracting the sa2 name
                'LGA_CODE21': victoria_lga.iloc[max_area_index]['LGA_CODE21'],
                'LGA_NAME21': victoria_lga.iloc[max_area_index]['LGA_NAME21']  # Extracting the lga name
            })
        else:
            # Handle the case where there's no valid intersection (e.g., log a warning or skip)
            print(f"Warning: No valid intersection found for SA2_CODE21: {sa2_row['SA2_CODE21']}")
    else:
        print(f"Warning: Missing geometry for SA2_CODE21: {sa2_row['SA2_CODE21']}")

# 3. Convert the Mapping List to a DataFrame
mapping_df = pd.DataFrame(mapping)




  areas = intersections.area




In [11]:
# Save the mapping csv
mapping_df.to_csv("../data/raw/sa2_to_lga.csv")