In [None]:
# put this in it's own codeblock because it'll keep 
# running otherwise and eventually move to the root dir lol
%cd ../..

In [None]:
from lib.service.database import DatabaseService
from lib.defaults import INSTANCE_CFG
import os

os.makedirs('_out_img', exist_ok=True)

db = DatabaseService.create(INSTANCE_CFG[3].database, 1)
await db.wait_till_running()

In [None]:
from dataclasses import dataclass, field
import locale
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
from matplotlib.offsetbox import AnchoredText
from matplotlib.patches import Patch
from matplotlib.colors import LogNorm, Normalize
from matplotlib import rc
from mpl_toolkits.axes_grid1 import make_axes_locatable
import warnings

@dataclass
class Target:
    name: str
    figsize: tuple[int, int]
    leg_loc: str
    src_loc: str
    split: str
    ntile: int
    on_unzoned: bool = field(default=True)
    intersection: bool | None = field(default=None)
    all_zones: bool = field(default=False)
    join: str = field(default='ST_Intersects')

    def make_subplot(self):
        # match self.split:
        #     case 'h':
        #         v, h, figsize = 2, 1, (self.figsize[0], self.figsize[1]*2)
        #         kwargs = {'sharex': True}
        #     case 'v':
        #         v, h, figsize = 1, 2, (self.figsize[0]*2, self.figsize[1])
        #         kwargs = {'sharey': True}
        #     case other:
        #         raise Error('specify split')
        # fig, ax = plt.subplots(v, h, figsize=figsize, **kwargs)
        # return fig, ax[0], ax[1]
        fig, ax = plt.subplots(1, 1, figsize=self.figsize)
        return fig, ax, None
        

    @property
    def cmap_linspace_ticks(self):
        return np.linspace(0, 1, self.ntile).tolist()

    @property
    def cardinal_cbar_position(self):
        return 'left' if self.split == 'v' else 'right'


targets = [
    Target('Cumberland', (25, 15), leg_loc="upper right", src_loc="lower left", join='ST_Within', split='h', ntile=10),
    Target('Sydney', (15, 20), leg_loc="lower right", src_loc="lower left", on_unzoned=False, split='v', ntile=10),
    Target('Randwick', (12, 20), leg_loc="upper right", src_loc="lower left", join='ST_Within', all_zones=True, on_unzoned=False, split='v', ntile=10),
    Target('Woollahra', (20, 18), leg_loc="lower right", src_loc="lower left", split='v', ntile=10),
    Target('Waverley', (20, 18), leg_loc="lower right", src_loc="lower left", split='v', ntile=10),
    Target('Burwood', (15, 20), leg_loc="lower right", src_loc="lower left", split='v', ntile=8),
    Target('Strathfield', (15, 20), leg_loc="lower right", src_loc="lower left", split='v', ntile=10),
    Target('The Hills', (20, 50), leg_loc="upper right", src_loc="lower left", split='v', ntile=10),
    Target('Parramatta', (22, 20), leg_loc="upper left", src_loc="lower left", join='ST_Within', split='v', ntile=10),
    Target('Inner West', (22, 20), leg_loc="upper left", src_loc="lower left", join='ST_Within', split='v', ntile=10),
    Target('Canada Bay', (25, 20), leg_loc="upper left", src_loc="lower left", split='h', ntile=10),
    Target('Fairfield', (25, 15), leg_loc="lower right", src_loc="lower left", join='ST_Within', on_unzoned=False, split='h', ntile=10),
    Target('Liverpool', (35, 20), leg_loc="lower right", src_loc="lower left", join='ST_Within', on_unzoned=False, split='h', ntile=10),
    Target('Canterbury-Bankstown', (22, 20), leg_loc="lower right", src_loc="lower left", on_unzoned=False, split='v', ntile=10),
    Target('Georges River', (22, 20), leg_loc="lower right", src_loc="lower left", on_unzoned=False, split='v', ntile=10),
    Target('Bayside (NSW)', (22, 20), leg_loc="lower right", src_loc="lower left", join='ST_Within', split='v', ntile=10),
    Target('Hunters Hill', (22, 20), leg_loc="lower right", src_loc="lower left", on_unzoned=False, split='v', ntile=10),
    Target('Ku-ring-gai', (22, 20), leg_loc="lower right", src_loc="lower left", on_unzoned=False, split='v', ntile=10),
    Target('Mosman', (22, 20), leg_loc="lower right", src_loc="lower left", on_unzoned=False, split='v', ntile=10),
    Target('North Sydney', (22, 20), leg_loc="lower right", src_loc="lower left", on_unzoned=False, split='v', ntile=10),
    Target('Northern Beaches', (20, 30), leg_loc="lower right", src_loc="lower left", on_unzoned=False, split='v', ntile=10),
    Target('Ryde', (22, 20), leg_loc="lower right", src_loc="lower left", on_unzoned=False, split='v', ntile=10),
    Target('Sutherland', (22, 20), leg_loc="lower right", src_loc="lower left", on_unzoned=False, join='ST_Within', split='v', ntile=10),
    Target('Willoughby', (22, 15), leg_loc="lower right", src_loc="lower left", on_unzoned=False, split='v', ntile=10),
    Target('Hornsby', (22, 30), leg_loc="lower right", src_loc="lower left", on_unzoned=False, split='v', ntile=10),
    Target('Penrith', (32, 30), leg_loc="lower right", src_loc="lower left", on_unzoned=False, split='v', ntile=10),
]

rc('text', usetex=False)

ranked_zones = ['B1','B2','B3','B4','B5','B6','B7','E1','E2','E3','E4','E5',
                'IN1','IN2','MU1','R1','R2','R3','R4','R5','RU1','RU2','RU3','RU4','RU5','RU6',
                'SP1','SP3','SP4','SP5','W1','W3','W2','W4']
hidden_zones = ['C1', 'C2', 'C3', 'C4','SP2','RE1','RE2']

async def execute(query):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        async with db.async_connect() as conn:
            await conn.execute(query);
            await conn.commit();

def create_source_annonation():
    source_box = AnchoredText(
        "Created by Angus Thomsen (@Angus_KST)\n"\
        "Source: NSW Valuer General, published November 2024 accessed 2025",
        loc=target.src_loc,
        prop=dict(size=9),
        frameon=True,  # Draw a box around the text
    )
    source_box.patch.set_boxstyle("round,pad=0")
    source_box.patch.set_edgecolor("white")
    source_box.patch.set_facecolor("white")
    return source_box

def format_aud(value):
    # Temporarily set locale
    current_locale = locale.getlocale(locale.LC_ALL)
    try:
        # Set the desired locale
        locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
        # Format the currency
        return locale.currency(value, grouping=True)
    finally:
        # Restore the original locale
        locale.setlocale(locale.LC_ALL, current_locale)

RANK_COLORS, CARDINAL_COLORS = 'viridis', 'magma'
NTILE_KIND = { 5: 'Quintile', 8: 'Octile', 10: 'Dectile' }

for target in targets:
    await execute(f"""
        DROP TABLE IF EXISTS lga_relevent;
        DROP TABLE IF EXISTS lga_sqm_land_values;
        
        CREATE TABLE lga_relevent AS
        SELECT property_id,
               pgeo.geometry as g,
               zoning.zone_code,
               CASE 
                  WHEN zoning.zone_code IS NULL THEN {target.on_unzoned} 
                  WHEN zoning.zone_code NOT IN ('{"', '".join(hidden_zones)}') THEN TRUE
                  ELSE {target.all_zones} 
               END AS is_ranked,
               CASE WHEN zoning.zone_code IN ('{"', '".join(hidden_zones)}') THEN TRUE ELSE FALSE END AS is_hidden
           FROM abs.lga_2024 lga
           LEFT JOIN nsw_lrs.property_geometry pgeo ON {target.join}(pgeo.geometry, lga.geometry)
           LEFT JOIN (
                SELECT DISTINCT ON (property_id) *
                  FROM nsw_lrs.zone_observation z
                  ORDER BY property_id, effective_date DESC) zoning USING (property_id)
           WHERE lga_name ILIKE '{target.name}';

        CREATE TABLE lga_sqm_land_values AS
        SELECT property_id, g, valuation_base_date, is_ranked, zone_code,
               (lv.land_value / ST_Area(p.g::geography)) as lv
          FROM lga_relevent p
          LEFT JOIN (
            SELECT DISTINCT ON (property_id) *
              FROM nsw_vg.land_valuation lv
              ORDER BY property_id, effective_date DESC) lv USING (property_id)
          WHERE lv.land_value IS NOT NULL
    """)

    lga_query = f"""
      SELECT geometry as g FROM abs.lga_2024 lga WHERE lga_name ILIKE '{target.name}'
    """
            
    cardinal_bins_query = f"""
      SELECT percentile,
             FLOOR(MIN(lv)) AS lower,
             FLOOR(MAX(lv)) AS upper
        FROM (
          SELECT lv, NTILE({target.ntile}) OVER (ORDER BY lv) AS percentile
            FROM lga_sqm_land_values WHERE is_ranked) p
        GROUP BY percentile
        ORDER BY percentile;
    """
            
    cardinal_query = f"""
      SELECT g, NTILE({target.ntile}) OVER (ORDER BY lv) AS percentile
        FROM lga_sqm_land_values WHERE is_ranked
    """
            
    max_query = f"""
      SELECT lv, ROUND(ST_Area(p.g::geography), 2) as area
        FROM lga_sqm_land_values
        WHERE is_ranked
        ORDER BY lv DESC LIMIT 1;
    """
            
    ranked_query = f"""
      SELECT g, RANK() OVER (ORDER BY lv) AS rank FROM lga_sqm_land_values WHERE is_ranked
    """

    bins_df = pd.read_sql(cardinal_bins_query, db.engine())
    bins_df['lower'] = bins_df['lower'].apply(format_aud)
    bins_df['upper'] = bins_df['upper'].apply(format_aud)
    cardinal_ticks = [
        f"≤ {bins_df.loc[0, "upper"]}",
        *[f"≤ {bins_df.loc[i, 'lower']}" for i in range(1, len(bins_df) - 1)],
        f"≥ {bins_df.loc[len(bins_df)-1, "lower"]}",
    ]

    # max_df = pd.read_sql(max_query, db.engine())
    # max_df['lv'] = max_df['lv'].apply(format_aud)
    

    fig, c_ax, r_ax = target.make_subplot()

    lga_df = gpd.read_postgis(lga_query, db.engine(), geom_col='g')
    if c_ax:
        lga_df.plot(ax=c_ax, color='#cccccc', edgecolor="#cccccc")
    if r_ax:
        lga_df.plot(ax=r_ax, color='#cccccc', edgecolor="#cccccc")

    if c_ax:
        cardinal_df = gpd.read_postgis(cardinal_query, db.engine(), geom_col='g')
        cardinal_df.plot(ax=c_ax, column='percentile', cmap=CARDINAL_COLORS)
        
        cardinal_sm = plt.cm.ScalarMappable(cmap=plt.get_cmap(CARDINAL_COLORS), norm=Normalize(vmin=0, vmax=1))
        cardinal_sm.set_array([])
        
        cardinal_cbar = plt.colorbar(cardinal_sm, ax=c_ax, shrink=0.8, aspect=50, ticks=target.cmap_linspace_ticks, location=target.cardinal_cbar_position)
        cardinal_cbar.set_label("SQM Land Value in AUD", labelpad=10, fontsize=15)
        cardinal_cbar.ax.tick_params(labelsize=12, rotation=67.5) 
        cardinal_cbar.ax.set_yticklabels(cardinal_ticks)
        for label in cardinal_cbar.ax.get_yticklabels():
            label.set_verticalalignment('center')
        
    if r_ax:
        ranked_df = gpd.read_postgis(ranked_query, db.engine(), geom_col='g')
        ranked_df.plot(ax=r_ax, column='rank', cmap=RANK_COLORS)
        
        ranked_sm = plt.cm.ScalarMappable(cmap=plt.get_cmap(RANK_COLORS), norm=Normalize(vmin=0, vmax=1))
        ranked_sm.set_array([])
        
        ranked_cbar = plt.colorbar(ranked_sm, ax=r_ax, shrink=0.8, aspect=50, ticks=[0, 0.5, 1], location="right")
        ranked_cbar.set_label("Position within ordered Ranking", labelpad=0, fontsize=15)
        ranked_cbar.ax.tick_params(labelsize=15, rotation=90) 
        ranked_cbar.ax.set_yticklabels(["Lower", "", "Higher"])
        for label in ranked_cbar.ax.get_yticklabels():
            label.set_verticalalignment('center') 

    custom_legend = [Patch(facecolor='#cccccc', edgecolor="black", label=target.name)]
    if c_ax:
        c_ax.legend(handles=custom_legend, loc=target.leg_loc, title="Legend", fontsize=12, title_fontsize=10)

    if r_ax:
        r_ax.legend(handles=custom_legend, loc=target.leg_loc, title="Legend", fontsize=12, title_fontsize=10)

    plt.title("Shared title")
    
    if c_ax:
        c_ax.set_title("\nSQM Land Value\nGrouped by %s\nWithin '%s' LGA\n" % (NTILE_KIND[target.ntile], target.name), fontsize=20)
        c_ax.add_artist(create_source_annonation())
        # c_ax.set_xlabel(f"\nMax SQM Land Value @ {bins_df.loc[len(bins_df)-1, "upper"]}", fontsize=15)


    if r_ax:
        r_ax.set_title("\nOrdered Ranking of Land\nBy SQM Land Value\nWithin '%s' LGA\n" % target.name, fontsize=20)
        r_ax.add_artist(create_source_annonation())
    
    # source_box = AnchoredText(
    #     "Source: NSW Valuer General, published November 2024 accessed 2025",
    #     loc=target.src_loc,
    #     prop=dict(size=10),
    #     frameon=True,  # Draw a box around the text
    # )
    # source_box.patch.set_boxstyle("round,pad=0.1")
    # source_box.patch.set_edgecolor("white")
    # source_box.patch.set_facecolor("white")
    # ax.add_artist(source_box)
    # plt.figtext(0.5, target.figtext_y[0], 
    #             "If you put the all properties here in a list and ordered of their SQM land " \
    #             "values, their position within this list would be their ranking. So keep in " \
    #             "mind this doesn't show the magnitude in the difference of price.",
    #             ha="center", fontsize=15, wrap=True, linespacing=1.5)
    fig.savefig(f"_out_img/lga_ntile_lvsqm-{target.name.replace(' ', '_')}.png")
    plt.show()
    await execute("""
        DROP TABLE IF EXISTS lga_relevent;
        DROP TABLE IF EXISTS lga_sqm_land_values;
    """)
    
    