# Greater Sydney Resource Equity Analysis
This notebook covers Task 1 (Data Cleaning), Task 2 (POI API), and Task 3 (Scoring) from the assignment.

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
from sqlalchemy import create_engine
from shapely.geometry import Point
from scipy.special import expit
import requests
import time
import zipfile
import os

## Task 1: Load and Clean Datasets

In [None]:
# Database connection
engine = create_engine('postgresql://username:password@localhost:5432/yourdatabase')

# Load datasets
df_business = pd.read_csv('Businesses.csv')
df_income = pd.read_csv('Income.csv')
df_population = pd.read_csv('Population.csv')

# Clean and filter
df_business = df_business[df_business['Industry'] == 'Manufacturing'].dropna()
df_income.dropna(inplace=True)
df_population.dropna(inplace=True)

# Load school catchments
with zipfile.ZipFile('catchments.zip', 'r') as zip_ref:
    zip_ref.extractall('catchments')

gdf_schools = gpd.read_file('catchments').to_crs(epsg=4326)

# Save to database
df_business.to_sql('business_manufacture', con=engine, if_exists='replace', index=False)
df_income.to_sql('income_clean', con=engine, if_exists='replace', index=False)
df_population.to_sql('population_clean', con=engine, if_exists='replace', index=False)
gdf_schools.to_postgis('school_catchments', con=engine, if_exists='replace', index=False)

## Task 2: Query POI API

In [None]:
def fetch_pois(bbox):
    url = f'https://api.nsw.gov.au/poi?bbox={bbox}'
    headers = {'Authorization': 'Bearer YOUR_API_KEY'}
    response = requests.get(url, headers=headers)
    return response.json() if response.status_code == 200 else []

def collect_pois(sa2_bboxes):
    pois = []
    for bbox in sa2_bboxes:
        data = fetch_pois(bbox)
        pois.extend(data)
        time.sleep(1)
    df_pois = pd.DataFrame(pois)
    df_pois.to_sql('pois', con=engine, if_exists='replace', index=False)
    return df_pois

## Task 3: Score Computation in SQL

In [None]:
query = '''
WITH z_business AS (
  SELECT sa2_code, (business_per_1000 - AVG(business_per_1000) OVER()) / STDDEV(business_per_1000) OVER() AS z
  FROM business_stats
),
z_stops AS (
  SELECT sa2_code, (stop_count - AVG(stop_count) OVER()) / STDDEV(stop_count) OVER() AS z
  FROM stop_stats
),
z_schools AS (
  SELECT sa2_code, (schools_per_1000_youth - AVG(schools_per_1000_youth) OVER()) / STDDEV(schools_per_1000_youth) OVER() AS z
  FROM school_stats
),
z_poi AS (
  SELECT sa2_code, (pois_per_km2 - AVG(pois_per_km2) OVER()) / STDDEV(pois_per_km2) OVER() AS z
  FROM poi_stats
),
combined AS (
  SELECT b.sa2_code, b.z + s.z + sc.z + p.z AS z_total
  FROM z_business b
  JOIN z_stops s ON b.sa2_code = s.sa2_code
  JOIN z_schools sc ON b.sa2_code = sc.sa2_code
  JOIN z_poi p ON b.sa2_code = p.sa2_code
)
SELECT sa2_code, 1.0 / (1.0 + EXP(-z_total)) AS score
INTO score_table
FROM combined;
'''
conn.execute(query)