In [1]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
from shapely.geometry import Point
import geopandas as gpd
import matplotlib.pyplot as plt

In [2]:
all_points_postcovid = gpd.read_file('../data/curated/stations/all_points_postcovid_(SA2 without stations).shp')

hospitals = pd.read_csv('../data/raw/features/non-residential_facilities/hospitals.csv')
schools = pd.read_csv('../data/raw/features/non-residential_facilities/schools.csv')
shopping_centres = pd.read_csv('../data/raw/features/non-residential_facilities/shopping_centre.csv')
sport_facilities = pd.read_csv('../data/raw/features/non-residential_facilities/sport_facilities.csv')
sport_facilities2 = pd.read_excel('../data/raw/features/non-residential_facilities/sport_facilities.xlsx')
census = pd.read_csv('../data/raw/features/SA2-T02_Selected_Medians_and_Averages.csv/abs_2021census_t02_aust_sa2-8898358042715570011.csv')
census2 = gpd.read_file('../data/raw/features/POPULATION_2023_SA2_GDA2020/32180_ERP_2023_SA2_GDA2020.gpkg')

In [3]:
census2 = census2[census2['State_name_2021'] == 'Victoria']
census_df_list = []
for id, group_df in census2[['SA2_name_2021', 'SA2_code_2021', 'Area_km2',
       'Pop_density_2023_people_per_km2']].groupby(['SA2_name_2021', 'SA2_code_2021']):
    
    group_df['total_population'] = group_df['Area_km2'] * group_df['Pop_density_2023_people_per_km2']
    census_df_list.append(group_df)

census_df = pd.concat(census_df_list)
census_df = pd.merge(census_df, census, left_on='SA2_code_2021', right_on=' sa2_code_2021')

In [4]:
GCCSA_2021 = gpd.read_file('../data/raw/map polygons/GCCSA_2021_AUST_SHP_GDA2020/GCCSA_2021_AUST_GDA2020.shp')
GCCSA_2021 = GCCSA_2021.to_crs('EPSG:7855')
Greater_Melbourne_2021 = GCCSA_2021.iloc[4]['geometry']

SA2_2021 = gpd.read_file('../data/raw/map polygons/SA2_2021_AUST_SHP_GDA2020/SA2_2021_AUST_GDA2020.shp')
SA2_2021 = SA2_2021.to_crs('EPSG:7855')
SA2_GreaterMelbourne_2021 = SA2_2021[SA2_2021.within(Greater_Melbourne_2021)]

In [5]:
hospitals['geometry'] = [Point(xy) for xy in zip(hospitals['Longitude'], hospitals['Latitude'])]
schools['geometry'] = [Point(xy) for xy in zip(schools['Longitude'], schools['Latitude'])]
shopping_centres['geometry'] = [Point(xy) for xy in zip(shopping_centres['Longitude'], shopping_centres['Latitude'])]
sport_facilities['geometry'] = [Point(xy) for xy in zip(sport_facilities['Longitude'], sport_facilities['Latitude'])]
sport_facilities2['geometry'] = [Point(xy) for xy in zip(sport_facilities2['Longitude'], sport_facilities2['Latitude'])]

hospitals = gpd.GeoDataFrame(hospitals, crs="EPSG:4326")
schools = gpd.GeoDataFrame(schools, crs="EPSG:4326")
shopping_centres = gpd.GeoDataFrame(shopping_centres, crs="EPSG:4326")
sport_facilities = gpd.GeoDataFrame(sport_facilities, crs="EPSG:4326")
sport_facilities2 = gpd.GeoDataFrame(sport_facilities2, crs="EPSG:4326")

hospitals = hospitals.to_crs("EPSG:7855")
schools = schools.to_crs("EPSG:7855")
shopping_centres = shopping_centres.to_crs("EPSG:7855")
sport_facilities = sport_facilities.to_crs("EPSG:7855")
sport_facilities2 = sport_facilities2.to_crs("EPSG:7855")

hospitals = hospitals[['geometry']]
schools = schools[['geometry']]
shopping_centres = shopping_centres[['geometry']]
sport_facilities = sport_facilities[['geometry']]
sport_facilities2 = sport_facilities2[['geometry']]

---

In [6]:
ANN = 1420.443
half_ANN = ANN/2

In [67]:
# finds the circles with radius ANN and ANN/2 around each point
all_points_postcovid['circle_ann'] = all_points_postcovid['geometry'].buffer(ANN)
all_points_postcovid['circle_half_ann'] = all_points_postcovid['geometry'].buffer(half_ANN)

In [68]:
all_points_postcovid_ann = all_points_postcovid.copy()
all_points_postcovid_half_ann = all_points_postcovid.copy()

all_points_postcovid_ann = all_points_postcovid_ann.set_geometry('circle_ann')
all_points_postcovid_half_ann = all_points_postcovid_half_ann.set_geometry('circle_half_ann')

all_points_postcovid_ann['circle_area'] = all_points_postcovid_ann.area

In [41]:
# find the points that intersect with the circles of half ann
schools_in_half_ann = gpd.sjoin(schools, all_points_postcovid_half_ann, op='intersects')
sport_facilities_in_half_ann = gpd.sjoin(sport_facilities, all_points_postcovid_half_ann, op='intersects')
sport_facilities2_in_half_ann = gpd.sjoin(sport_facilities2, all_points_postcovid_half_ann, op='intersects')
shopping_centres_in_half_ann = gpd.sjoin(shopping_centres, all_points_postcovid_half_ann, op='intersects')
hospitals_in_half_ann = gpd.sjoin(hospitals, all_points_postcovid_half_ann, op='intersects')

# find unique point names (all points here had half ann circle intersected with a building)
stations_with_schools = schools_in_half_ann['Point Name'].unique()
stations_with_sport_facilities = sport_facilities_in_half_ann['Point Name'].unique()
stations_with_sport_facilities2 = sport_facilities2_in_half_ann['Point Name'].unique()
stations_with_sport_facilities = set(stations_with_sport_facilities).union(set(stations_with_sport_facilities2))
stations_with_shopping_centres = shopping_centres_in_half_ann['Point Name'].unique()
stations_with_hospitals = hospitals_in_half_ann['Point Name'].unique()

# add feature onto all_points_postcovid
all_points_postcovid['has_school'] = all_points_postcovid['Point Name'].apply(lambda x: 1 if x in stations_with_schools else 0)
all_points_postcovid['has_sport_facility'] = all_points_postcovid['Point Name'].apply(lambda x: 1 if x in stations_with_sport_facilities else 0)
all_points_postcovid['has_shopping_centre'] = all_points_postcovid['Point Name'].apply(lambda x: 1 if x in stations_with_shopping_centres else 0)
all_points_postcovid['has_hospital'] = all_points_postcovid['Point Name'].apply(lambda x: 1 if x in stations_with_hospitals else 0)

  if await self.run_code(code, result, async_=asy):
  if await self.run_code(code, result, async_=asy):
  if await self.run_code(code, result, async_=asy):
  if await self.run_code(code, result, async_=asy):
  if await self.run_code(code, result, async_=asy):


In [70]:
census = pd.merge(SA2_GreaterMelbourne_2021[['SA2_NAME21', 'geometry']], census_df, left_on = 'SA2_NAME21', right_on = 'SA2_name_2021', how = 'right')
census['area'] = census.geometry.area

# find overlapping area
all_points_postcovid_ann = gpd.overlay(all_points_postcovid_ann, census, how = 'intersection')
all_points_postcovid_ann['intersection_area'] = all_points_postcovid_ann.geometry.area
all_points_postcovid_ann['sa2_intersection_percentage'] = all_points_postcovid_ann['intersection_area']/all_points_postcovid_ann['area']
all_points_postcovid_ann['circle_intersection_percentage'] = all_points_postcovid_ann['intersection_area']/all_points_postcovid_ann['circle_area']

all_points_postcovid_ann1 = all_points_postcovid_ann[['Point Name',
       'total_population',
        'sa2_intersection_percentage']]

all_points_postcovid_ann2 = all_points_postcovid_ann[['Point Name',
       'average_hh_size_c2021', ' med_tot_hh_inc_wee_c2021',
       ' avg_num_p_per_brm_c2021', ' med_age_persns_c2021',
       ' med_rent_weekly_c2021', ' med_mortg_rep_mon_c2021',
       ' med_person_inc_we_c2021', ' med_famly_inc_we_c2021',
       'circle_intersection_percentage']]

# process in following way: total - then take the intersection area's percentage of sa2
for col in all_points_postcovid_ann1.columns[1:]:
    if col not in ['Point Name', 'sa2_intersection_percentage']:
        all_points_postcovid_ann1[col] = all_points_postcovid_ann1[col] * all_points_postcovid_ann1['sa2_intersection_percentage']

# process in following way: avearge/median - then take the intersection area's percentage of circle
for col in all_points_postcovid_ann2.columns[1:]:
    if col not in ['Point Name', 'circle_intersection_percentage']:
        all_points_postcovid_ann2[col] = all_points_postcovid_ann2[col] * all_points_postcovid_ann2['circle_intersection_percentage']

# Group by 'Point Name' and sum up
all_points_postcovid_ann1_grouped = all_points_postcovid_ann1.groupby('Point Name').sum().reset_index()
all_points_postcovid_ann2_grouped = all_points_postcovid_ann2.groupby('Point Name').sum().reset_index()

all_points_postcovid_ann1_grouped = all_points_postcovid_ann1_grouped.drop(columns=['sa2_intersection_percentage'])
all_points_postcovid_ann2_grouped = all_points_postcovid_ann2_grouped.drop(columns=['circle_intersection_percentage'])

all_points_postcovid = pd.merge(all_points_postcovid, all_points_postcovid_ann1_grouped, how = 'left', on = 'Point Name')
all_points_postcovid = pd.merge(all_points_postcovid, all_points_postcovid_ann2_grouped, how = 'left', on = 'Point Name')

all_points_postcovid = all_points_postcovid.drop(['geometry', 'circle_ann', 'circle_half_ann'], axis = 1)

In [83]:
all_points_postcovid.to_csv('../data/curated/ML_features/census_and_buildings_postcovid.csv')