In [1]:
import pandas as pd
import geopandas as gpd
import os
from shapely.geometry import Point

def map_schools_to_regions(input_csv, regions_geojson, count_csv):
    schools_df = pd.read_csv(input_csv)
    
    geometry = [Point(xy) for xy in zip(schools_df['lon'], schools_df['lat'])]
    schools_gdf = gpd.GeoDataFrame(schools_df, geometry=geometry)
    
    regions_gdf = gpd.read_file(regions_geojson)
    
    if schools_gdf.crs != regions_gdf.crs:
        schools_gdf = schools_gdf.set_crs(regions_gdf.crs, allow_override=True)
    
    schools_in_regions = gpd.sjoin(schools_gdf, regions_gdf, how='left', predicate='within')
    
    schools_in_regions['region'] = schools_in_regions['SA2_NAME21']  
    
    schools_in_regions = schools_in_regions.drop(columns=['index_right', 'geometry'])
    
    region_school_counts = schools_in_regions['region'].value_counts().reset_index()
   
    input_file_name = os.path.splitext(os.path.basename(input_csv))[0]
    
    region_school_counts.columns = ['region', f'{input_file_name}_count']
    
    region_school_counts.to_csv(count_csv, index=False)
    
    return region_school_counts




# Mapping facilities to SA2

In [2]:
input_csv = '../../data/raw/osm_data/school1.csv' 
regions_geojson = '../../data/landing/region_data/sa2_dataset/sa2_unzip'  
count_csv = '../../data/raw/facility_count/school1_count.csv'
# Ensure the directory exists before saving the file
os.makedirs(os.path.dirname(count_csv), exist_ok=True)

schools_mapped = map_schools_to_regions(input_csv, regions_geojson, count_csv)

In [3]:

schools_csv = '../../data/raw/osm_data/school2.csv' 
regions_geojson = '../../data/landing/region_data/sa2_dataset/sa2_unzip'  
count_csv = '../../data/raw/facility_count/school2_count.csv'

schools_mapped = map_schools_to_regions(schools_csv, regions_geojson, count_csv)


In [4]:

schools_csv = '../../data/raw/osm_data/entertainments.csv'  
regions_geojson = '../../data/landing/region_data/sa2_dataset/sa2_unzip' 
count_csv = '../../data/raw/facility_count/entertainments_count.csv'

schools_mapped = map_schools_to_regions(schools_csv, regions_geojson, count_csv)


In [5]:

schools_csv = '../../data/raw/osm_data/hospital.csv'  
regions_geojson = '../../data/landing/region_data/sa2_dataset/sa2_unzip'  
count_csv = '../../data/raw/facility_count/hospital_count.csv'

schools_mapped = map_schools_to_regions(schools_csv, regions_geojson, count_csv)


In [6]:
schools_csv = '../../data/raw/osm_data/park.csv'  
regions_geojson = '../../data/landing/region_data/sa2_dataset/sa2_unzip'  
count_csv = '../../data/raw/facility_count/park_count.csv'

schools_mapped = map_schools_to_regions(schools_csv, regions_geojson, count_csv)

In [7]:

schools_csv = '../../data/raw/osm_data/psf.csv'  
regions_geojson = '../../data/landing/region_data/sa2_dataset/sa2_unzip' 
count_csv = '../../data/raw/facility_count/psf_count.csv'

schools_mapped = map_schools_to_regions(schools_csv, regions_geojson, count_csv)

In [8]:

schools_csv = '../../data/raw/osm_data/shop.csv'  
regions_geojson = '../../data/landing/region_data/sa2_dataset/sa2_unzip'  
count_csv = '../../data/raw/facility_count/shop_count.csv'

schools_mapped = map_schools_to_regions(schools_csv, regions_geojson, count_csv)

## Count stops in each SA2

In [9]:
import pandas as pd

file_path = '../../data/raw/stops_data/stops_datavic_mapped.csv'  
df = pd.read_csv(file_path)

unique_sa2_counts = df['SA2_NAME21'].value_counts().reset_index()
unique_sa2_counts.columns = ['region', 'stop_count']  

output_path = '../../data/raw/facility_count/stops_datavic_count.csv'  
unique_sa2_counts.to_csv(output_path, index=False)


print(f"Unique SA2_NAME21 counts saved to {output_path}")

Unique SA2_NAME21 counts saved to ../../data/raw/facility_count/stops_datavic_count.csv


In [10]:
import pandas as pd
import os

# Define the list of CSV file paths
csv_files = [
    '../../data/raw/facility_count/entertainments_count.csv',
    '../../data/raw/facility_count/hospital_count.csv',
    '../../data/raw/facility_count/park_count.csv',
    '../../data/raw/facility_count/psf_count.csv',
    '../../data/raw/facility_count/school2_count.csv',
    '../../data/raw/facility_count/school1_count.csv',
    '../../data/raw/facility_count/shop_count.csv',
    '../../data/raw/facility_count/stops_datavic_count.csv'

]

# Initialize the first DataFrame and rename its columns based on the file name
first_file = csv_files[0]
first_df = pd.read_csv(first_file)

# Iterate over the remaining CSV files and merge them
for file in csv_files[1:]:
    df = pd.read_csv(file)
    # Rename the columns based on the file name
    
    # Merge the DataFrames on 'region'
    first_df = pd.merge(first_df, df, on='region', how='outer')


# Save the final merged DataFrame to a new CSV file
output_csv = '../../data/raw/facility_count/facility_merged.csv'
first_df.to_csv(output_csv, index=False)

# Display the merged DataFrame
print(first_df)




                   region  entertainments_count  hospital_count  park_count  \
0    Melbourne CBD - East                 317.0             NaN         2.0   
1    Melbourne CBD - West                 153.0             NaN         3.0   
2                 Fitzroy                 135.0             2.0        13.0   
3                 Carlton                  97.0             1.0        21.0   
4                 Geelong                  94.0             3.0        27.0   
..                    ...                   ...             ...         ...   
558            Deniliquin                   NaN             NaN         NaN   
559            Tumbarumba                   NaN             NaN         NaN   
560  Naracoorte Surrounds                   NaN             NaN         NaN   
561            Naracoorte                   NaN             NaN         NaN   
562                  Yass                   NaN             NaN         NaN   

     psf_count  school2_count  school1_count  shop_