In [48]:
import json
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [49]:
# Load the data from JSON file
with open('../cleaned/epa_data.json', 'r') as file:
    data = json.load(file)

lga_geojson_path = '../cleaned/LGA_useful_geo/lga_for_join.json'
lga_gdf = gpd.read_file(lga_geojson_path)

In [50]:
# Extract relevant information
site_data = []
for record in data['records']:
    site_name = record['siteName']
    coordinates = record['geometry']['coordinates']
    latitude, longitude = coordinates[0], coordinates[1]
    
    site_data.append({
        'siteName': site_name,
        'latitude': latitude,
        'longitude': longitude
    })

# Create DataFrame
df_sites = pd.DataFrame(site_data).drop_duplicates(subset='siteName')

# Check the DataFrame
df_sites


Unnamed: 0,siteName,latitude,longitude
0,Box Hill,-37.828728,145.132400
1,Alphington,-37.778408,145.030600
2,Churchill,-38.304314,146.414932
3,Traralgon South,-38.295850,146.539200
4,Traralgon,-38.194282,146.531464
...,...,...,...
88,Mooroolbark,-37.775120,145.328400
89,Footscray,-37.802660,144.877800
90,Brooklyn,-37.822098,144.847100
91,Bendigo,-36.778410,144.300064


In [51]:
# Convert DataFrame to GeoDataFrame
df_sites['geometry'] = df_sites.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
sites_gdf = gpd.GeoDataFrame(df_sites, geometry='geometry')

# Set the same CRS as the LGA data to ensure accurate spatial operations
sites_gdf.crs = lga_gdf.crs

In [52]:
# Perform spatial join
joined_gdf = gpd.sjoin(sites_gdf, lga_gdf, how="inner", op='intersects')

# Select relevant columns
final_gdf = joined_gdf[['ABSLGACODE', 'LGA_NAME', 'siteName', 'latitude', 'longitude']]

# Check the resulting GeoDataFrame
final_gdf

  if await self.run_code(code, result, async_=asy):


Unnamed: 0,ABSLGACODE,LGA_NAME,siteName,latitude,longitude
0,26980,whitehorse,Box Hill,-37.828728,145.132400
1,21890,darebin,Alphington,-37.778408,145.030600
2,23810,latrobe,Churchill,-38.304314,146.414932
3,23810,latrobe,Traralgon South,-38.295850,146.539200
4,23810,latrobe,Traralgon,-38.194282,146.531464
...,...,...,...,...,...
88,27450,yarra ranges,Mooroolbark,-37.775120,145.328400
89,24330,maribyrnong,Footscray,-37.802660,144.877800
90,23110,hobsons bay,Brooklyn,-37.822098,144.847100
91,22620,greater bendigo,Bendigo,-36.778410,144.300064


In [53]:
final_gdf.set_index('siteName', inplace=True)

data_dict = final_gdf.to_dict(orient='index')

print(json.dumps(data_dict, indent=4))

{
    "Box Hill": {
        "ABSLGACODE": "26980",
        "LGA_NAME": "whitehorse",
        "latitude": -37.8287277,
        "longitude": 145.1324
    },
    "Alphington": {
        "ABSLGACODE": "21890",
        "LGA_NAME": "darebin",
        "latitude": -37.7784081,
        "longitude": 145.0306
    },
    "Churchill": {
        "ABSLGACODE": "23810",
        "LGA_NAME": "latrobe",
        "latitude": -38.3043137,
        "longitude": 146.414932
    },
    "Traralgon South": {
        "ABSLGACODE": "23810",
        "LGA_NAME": "latrobe",
        "latitude": -38.29585,
        "longitude": 146.5392
    },
    "Traralgon": {
        "ABSLGACODE": "23810",
        "LGA_NAME": "latrobe",
        "latitude": -38.1942825,
        "longitude": 146.531464
    },
    "Tyers North": {
        "ABSLGACODE": "23810",
        "LGA_NAME": "latrobe",
        "latitude": -38.12967,
        "longitude": 146.4828
    },
    "Dandenong": {
        "ABSLGACODE": "22670",
        "LGA_NAME": "greater da

In [54]:
with open('matched_sites_with_lgas.json', 'w') as json_file:
    json.dump(data_dict, json_file)