# Preprocessing School zones
- This notebook preprocesses the school zone data.
- The preprocessed datasets are exported into the `./data/raw` directory.

In [2]:
import sys, os
sys.path.append(os.path.abspath('../'))
from scripts.utils import create_dir, get_runtime
import time 
start_time = time.time()

import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

## Primary School

In [3]:
school_primary = gpd.read_file("../data/landing/school-zones/Primary_Integrated_2024.geojson")
school_primary_filtered = school_primary[['School_Name', 'geometry']]
school_primary_filtered

Unnamed: 0,School_Name,geometry
0,Lockwood Primary School,"POLYGON ((144.19743 -36.82513, 144.20366 -36.8..."
1,Lockwood South Primary School,"POLYGON ((143.99198 -36.80865, 143.99179 -36.8..."
2,Heathcote Primary School,"POLYGON ((144.6649 -37.02629, 144.88917 -36.91..."
3,Redesdale Mia Mia Primary School,"POLYGON ((144.55801 -36.89947, 144.51209 -36.9..."
4,Axedale Primary School,"POLYGON ((144.71365 -36.74947, 144.66726 -36.7..."
...,...,...
1250,Orbost Community College,"POLYGON ((148.51201 -37.74078, 148.51133 -37.7..."
1251,Laa Yulta Primary School,"POLYGON ((144.52002 -37.90403, 144.58914 -37.8..."
1252,Binap Primary School,"POLYGON ((144.48594 -37.70504, 144.49464 -37.6..."
1253,Wimba Primary School,"POLYGON ((144.60593 -37.81212, 144.61999 -37.7..."


## Secondary School

In [4]:
school_secondary = gpd.read_file("../data/landing/school-zones/Secondary_Integrated_Year7_2024.geojson")
school_secondary_filtered = school_secondary[['School_Name', 'geometry']]
school_secondary_filtered

Unnamed: 0,School_Name,geometry
0,Ballarat High School,"POLYGON ((143.85475 -37.56184, 143.85634 -37.5..."
1,Phoenix P-12 Community College,"POLYGON ((143.53234 -37.65873, 143.49538 -37.7..."
2,Mount Rowan Secondary College,"POLYGON ((143.98962 -37.42296, 143.85442 -37.2..."
3,Woodmans Hill Secondary College,"POLYGON ((143.85634 -37.54563, 143.85475 -37.5..."
4,Weeroona College Bendigo,"POLYGON ((144.68929 -36.75938, 144.65997 -36.6..."
...,...,...
317,Mickleham Secondary College,"POLYGON ((144.8169 -37.55453, 144.82972 -37.49..."
318,Brinbeal Secondary College,"POLYGON ((144.56899 -37.79299, 144.61257 -37.7..."
319,Werribee Secondary College,"POLYGON ((144.63905 -38.00232, 144.63898 -38.0..."
320,Bemin Secondary College,"POLYGON ((144.66176 -37.77554, 144.69232 -37.8..."


## Juniorsec

In [5]:
school_juniorsec = gpd.read_file("../data/landing/school-zones/Standalone_juniorsec_2024.geojson")
school_juniorsec_filtered = school_juniorsec[['School_Name', 'geometry']]
school_juniorsec_filtered

Unnamed: 0,School_Name,geometry
0,Timbarra P-9 College,"POLYGON ((145.31724 -38.04186, 145.29801 -38.0..."
1,Doveton College,"POLYGON ((145.20844 -38.02207, 145.23414 -37.9..."


## Seniorsec

In [6]:
school_seniorsec = gpd.read_file("../data/landing/school-zones/Standalone_seniorsec_2024.geojson")
school_seniorsec_filtered = school_seniorsec[['School_Name', 'geometry']]
school_seniorsec_filtered

Unnamed: 0,School_Name,geometry
0,Swinburne Senior Secondary College,"POLYGON ((145.01812 -37.83546, 145.0274 -37.79..."
1,Box Hill Senior Secondary College,"POLYGON ((145.10939 -37.83706, 145.10692 -37.8..."
2,Northern College of the Arts and Technology,"POLYGON ((145.04937 -37.74843, 145.0169 -37.74..."


## Single Sex

In [7]:
school_singlesex = gpd.read_file("../data/landing/school-zones/Standalone_singlesex_2024.geojson")
school_singlesex_filtered = school_singlesex[['School_Name', 'geometry']]
school_singlesex_filtered

Unnamed: 0,School_Name,geometry
0,Matthew Flinders Girls Secondary College,"POLYGON ((144.4229 -38.08573, 144.41875 -38.08..."
1,Canterbury Girls Secondary College,"POLYGON ((145.03764 -37.80765, 145.09942 -37.8..."
2,Mentone Girls Secondary College,"POLYGON ((145.07547 -38.00074, 145.0713 -37.99..."
3,Melbourne Girls College,"POLYGON ((145.02534 -37.79516, 145.04552 -37.8..."
4,Pascoe Vale Girls Secondary College,"POLYGON ((144.89236 -37.7176, 144.89282 -37.71..."


In [8]:
# Assume df1, df2, and df3 are your DataFrames
combined_df = pd.concat([school_primary_filtered, school_secondary_filtered, school_juniorsec_filtered, school_seniorsec_filtered, school_singlesex_filtered], ignore_index=True)

# Display the combined DataFrame
combined_df

Unnamed: 0,School_Name,geometry
0,Lockwood Primary School,"POLYGON ((144.19743 -36.82513, 144.20366 -36.8..."
1,Lockwood South Primary School,"POLYGON ((143.99198 -36.80865, 143.99179 -36.8..."
2,Heathcote Primary School,"POLYGON ((144.6649 -37.02629, 144.88917 -36.91..."
3,Redesdale Mia Mia Primary School,"POLYGON ((144.55801 -36.89947, 144.51209 -36.9..."
4,Axedale Primary School,"POLYGON ((144.71365 -36.74947, 144.66726 -36.7..."
...,...,...
1582,Matthew Flinders Girls Secondary College,"POLYGON ((144.4229 -38.08573, 144.41875 -38.08..."
1583,Canterbury Girls Secondary College,"POLYGON ((145.03764 -37.80765, 145.09942 -37.8..."
1584,Mentone Girls Secondary College,"POLYGON ((145.07547 -38.00074, 145.0713 -37.99..."
1585,Melbourne Girls College,"POLYGON ((145.02534 -37.79516, 145.04552 -37.8..."


## Push School zones data to `raw` layer

In [9]:
create_dir('../data/raw/school_zones')
combined_df.to_csv('../data/raw/school_zones/school_zones.csv', index=False)

Created directory: ../data/raw/school_zones

