In [2]:
from lib.gnaf_db import GnafDb
from lib import notebook_constants as nc
from lib.remote_resources import StaticFileInitialiser
from lib.nsw_vg.data_discovery import LandValueDiscovery, WeeklySalePriceDiscovery

initialiser = StaticFileInitialiser.create()
land_value = LandValueDiscovery()
sale_price = WeeklySalePriceDiscovery()

sale_price_target = sale_price.get_latest()
land_value_target = land_value.get_latest()

if land_value_target:
    initialiser.add_target(land_value_target)
    
if sale_price_target:
    initialiser.add_target(sale_price_target)
    
initialiser.setup_dirs()
initialiser.fetch_remote_resources()

gnaf = GnafDb.create()
gnaf_2 = GnafDb.create(nc.gnaf_dbconf_2, nc.gnaf_dbname_2)
gnaf.wait_till_running()
gnaf_2.wait_till_running()

Checking gnaf-2020.zip
Checking non_abs_shape.zip
Checking cities.zip
Checking nswvg_lv_01_Sep_2024.zip
Checking nswvg_wps_02_Sep_2024.zip


In [2]:
import pandas as pd
from pprint import pprint
from sqlalchemy import text
from lib.nsw_vg.property_description import parse_property_description

def get_the_data(query, params=None, instance='gnaf_1'):
    engine = None
    if instance == 'gnaf_1':
        engine = gnaf.engine()
    elif instance == 'gnaf_2':
        engine = gnaf_2.engine()
    return pd.read_sql(text(query), engine, params=params)

In [3]:
rows = get_the_data("""
  SELECT p.property_id as nsw_id,
         pd.property_description as desc,
         p.property_name as name,
         p.unit_number as unit,
         p.house_number as house,
         st.street_name as street,
         su.suburb_name as suburb,
         p.postcode,
         p.zone_code as zone,
         p.area,
         v.base_date,
         v.land_value
    FROM nsw_valuer_general.property_description pd
    JOIN nsw_valuer_general.property p ON p.property_id = pd.property_id
    JOIN nsw_valuer_general.street st ON p.street_id = st.street_id
    JOIN nsw_valuer_general.suburb su ON p.suburb_id = su.suburb_id
    JOIN (SELECT va.* FROM nsw_valuer_general.valuations va
           WHERE va.base_date =
             (SELECT MAX(vb.base_date)
                FROM nsw_valuer_general.valuations vb
               WHERE va.property_id = vb.property_id)) v
      ON v.property_id = p.property_id
   WHERE pd.property_description ~ '^\d+/\d+(/\d+)?$'
   -- WHERE pd.property_description !~ '^\d+/\d+(/\d+)?$' AND
   --       pd.property_description !~ '^,? ?Mineral (Claim|Lease) \d+[A-Z]?\s*$' AND
   --       pd.property_description !~ '^(\d+/\d+(/\d+)? )*(Mineral Claim \d+[R]? )*(Western Land Lease \d+ *)*$'
   ORDER BY land_value DESC, street, house, unit
  OFFSET    0 -- 1900
   LIMIT   50
""")


weird = []
for i, row in rows.iterrows():
    desc = rows.loc[i, 'desc']
    remaining, expanded = parse_property_description(desc)
    
    if remaining:
        pprint({
            'description': desc,
            'remaining': remaining,
            'expanded': expanded,
        })
        weird.append(desc)
pprint(weird)

rows

[]


Unnamed: 0,nsw_id,desc,name,unit,house,street,suburb,postcode,zone,area,base_date,land_value
0,4496832,1/1290500,,,1,ALFRED ST,SYDNEY,2000,SP5,14230.0,2023-01-07,438875113
1,4544964,2/1276320,,,565,LUDDENHAM RD,LUDDENHAM,2745,MU,1875000.0,2023-01-07,425000000
2,4456225,1/1287712,,,1953-2109,ELIZABETH DR,BADGERYS CREEK,2555,ENT,2805000.0,2023-01-07,394000000
3,3623946,21/1171076,,,14-54,DENNISTOUN AVE,YENNORA,2161,E4,700330.0,2023-01-07,365000000
4,4544963,1/1276320,,,601,LUDDENHAM RD,LUDDENHAM,2745,MU,1006000.0,2023-01-07,363000000
5,4491385,3101/1282964,,,215,BADGERYS CREEK RD,BRADFIELD,2556,MU,1131000.0,2023-01-07,352000000
6,4348804,60/1107965,WILLANDRA VILLAGE,,157,BALACLAVA RD,MACQUARIE PARK,2113,MU1,63920.0,2023-01-07,340000000
7,3791727,100/1190494,MACQUARIE SHOPPING CENTRE,,197-223,HERRING RD,MACQUARIE PARK,2113,MU1,110700.0,2023-01-07,325000000
8,3819161,1/1182754,,,188,PITT ST,SYDNEY,2000,SP5,11860.0,2023-01-07,319000000
9,3169836,2/1073376,,,50,BRIDGE ST,SYDNEY,2000,SP5,5838.0,2023-01-07,318000000


In [4]:
rows = get_the_data("""
  SELECT p.property_id as nsw_id,
         pd.property_description as desc,
         p.property_name as name,
         p.unit_number as unit,
         p.house_number as house,
         st.street_name as street,
         su.suburb_name as suburb,
         p.postcode,
         p.zone_code as zone,
         p.area,
         v.base_date,
         v.land_value
    FROM nsw_valuer_general.property_description pd
    JOIN nsw_valuer_general.property p ON p.property_id = pd.property_id
    JOIN nsw_valuer_general.street st ON p.street_id = st.street_id
    JOIN nsw_valuer_general.suburb su ON p.suburb_id = su.suburb_id
    JOIN (SELECT va.* FROM nsw_valuer_general.valuations va
           WHERE va.base_date =
             (SELECT MAX(vb.base_date)
                FROM nsw_valuer_general.valuations vb
               WHERE va.property_id = vb.property_id)) v
      ON v.property_id = p.property_id
   WHERE pd.property_description !~ '^,? ?Mineral (Claim|Lease) \d+[A-Z]?\s*$' AND
         pd.property_description !~ '^,? ?Mineral (Claim|Lease) \d+[A-Z]?\s*$' AND
         pd.property_description !~ '^\d+/\d+(/\d+)?$' AND
         pd.property_description !~ '^(\d+/\d+(/\d+)? )*(Mineral Claim \d+[R]? )*(Western Land Lease \d+ *)*$'
   -- WHERE pd.property_description NOT ILIKE 'Mineral Claim %' AND
   --       pd.property_description NOT ILIKE 'Mineral Lease %' AND
   --       pd.property_description NOT ILIKE '% Western Land Lease %' AND
   --       pd.property_description NOT ILIKE '% Non-Irrigable Purchase %' AND
   --       pd.property_description NOT ILIKE '% Railway Land Lease %' AND
   --       p.zone_code NOT IN :ignored_zones
   ORDER BY street, house, unit
  OFFSET 150
   LIMIT 50
""", params={
    "ignored_zones": ('RU1', 'RU2', 'RU3', 'RU4', 'RU5', 'SP1', 'E1', 'E2', 'E3', 'E4', 'RE1'),    
})


for i, row in rows.iterrows():
    desc = rows.loc[i, 'desc']
    remaining, expanded = parse_property_description(desc)
    
    if remaining:
        pprint({
            'description': desc,
            'remaining': remaining,
            'expanded': expanded,
        })

rows

Unnamed: 0,nsw_id,desc,name,unit,house,street,suburb,postcode,zone,area,base_date,land_value
0,2229648,"1, 2, 3, 4, 5, 6, 7, CP/SP56627",,,14,ABBOTT RD,SEVEN HILLS,2147,E4,2314.0,2023-01-07,3680000
1,2869628,"1, 2, 3, 4, 5, 6, 7, 8, 9, CP/SP65830",,,2,ABBOTT RD,SEVEN HILLS,2147,E4,4271.0,2023-01-07,6600000
2,897378,"225, 226, 228, 229, 230/752038",,,225,ABBOTT RD,NORTH CURL CURL,2099,RE1,31990.0,2023-01-07,6270000
3,897407,B/391200,,,34A,ABBOTT RD,NORTH CURL CURL,2099,R2,1100.0,2023-01-07,2660000
4,897406,PT 56/20103,,,34B,ABBOTT RD,NORTH CURL CURL,2099,R2,116.1,2023-01-07,50800
5,2152374,"1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...",,,36,ABBOTT RD,SEVEN HILLS,2147,E4,8031.0,2023-01-07,8830000
6,897408,A/391200,,,36,ABBOTT RD,NORTH CURL CURL,2099,R2,1100.0,2023-01-07,2660000
7,897379,"227, 373/752038",,,373,ABBOTT RD,NORTH CURL CURL,2099,RE1,13050.0,2023-01-07,2830000
8,2152350,"1, 2, 3, 4, 5, 6, 7, 8, 9, CP/SP51766 11, 12, ...",,,4,ABBOTT RD,SEVEN HILLS,2147,E4,4750.0,2023-01-07,7090000
9,949470,"1/88143 A, B/318869 723, 724, 725, 726, 727/75...",,,5,ABBOTT RD,ARTARMON,2064,SP2,5916.2,2023-01-07,12300000


In [5]:
get_the_data("""
  SELECT 
      ad.address_detail_pid as gnaf_id,
      ad.legal_parcel_id,
      ad.building_name,
      CONCAT(ad.lot_number_prefix, ad.lot_number, ad.lot_number_suffix) as lot,
      CONCAT(ad.flat_number_prefix, ad.flat_number, ad.flat_number_suffix) as unit,
      CONCAT(COALESCE(ad.level_type_code || ' ', ''),
             COALESCE(ad.level_number_prefix || '-', ''),
             ad.level_number,
             COALESCE('-' || ad.level_number_suffix, '')) as level,
      CONCAT(
        ad.number_first_prefix, ad.number_first, ad.number_first_suffix,
        COALESCE('-' || ad.number_last_prefix || ad.number_last || ad.number_last_suffix, '')
      ) as house,
      sl.street_name,
      l.locality_name,
      ad.postcode
    FROM gnaf.ADDRESS_DETAIL ad
    JOIN gnaf.LOCALITY l ON ad.locality_pid = l.locality_pid
    JOIN gnaf.STREET_LOCALITY sl ON ad.street_locality_pid = sl.street_locality_pid
    JOIN gnaf.STATE s ON l.state_pid = s.state_pid
   WHERE s.state_abbreviation = 'NSW'
     AND ad.legal_parcel_id IN (
      SELECT ad.legal_parcel_id FROM gnaf.ADDRESS_DETAIL ad
       GROUP BY ad.legal_parcel_id HAVING COUNT(*) > 1)
   ORDER BY ad.legal_parcel_id, street_name, house, level, unit
   LIMIT 20
  OFFSET 60
""")

Unnamed: 0,gnaf_id,legal_parcel_id,building_name,lot,unit,level,house,street_name,locality_name,postcode
0,GANSW718138632,1000/1117715,PEPPERFIELD,,21,,8,WISEMAN,BOWRAL,2576
1,GANSW718138633,1000/1117715,PEPPERFIELD,,22,,8,WISEMAN,BOWRAL,2576
2,GANSW718138634,1000/1117715,PEPPERFIELD,,23,,8,WISEMAN,BOWRAL,2576
3,GANSW718127753,1000/1117715,PEPPERFIELD,,24,,8,WISEMAN,BOWRAL,2576
4,GANSW718138635,1000/1117715,PEPPERFIELD,,25,,8,WISEMAN,BOWRAL,2576
5,GANSW718138636,1000/1117715,PEPPERFIELD,,26,,8,WISEMAN,BOWRAL,2576
6,GANSW718138637,1000/1117715,PEPPERFIELD,,27,,8,WISEMAN,BOWRAL,2576
7,GANSW718138638,1000/1117715,PEPPERFIELD,,28,,8,WISEMAN,BOWRAL,2576
8,GANSW718138639,1000/1117715,PEPPERFIELD,,29,,8,WISEMAN,BOWRAL,2576
9,GANSW718127760,1000/1117715,PEPPERFIELD,,3,,8,WISEMAN,BOWRAL,2576


In [6]:
property_descriptions = get_the_data("""
  SELECT p.property_id as nsw_id,
         pd.property_description as desc,
         -- src.source_file_position,
         -- sf.source_file_name,
         p.property_name as name,
         p.unit_number as unit,
         p.house_number as house,
         st.street_name as street,
         su.suburb_name as suburb,
         p.postcode,
         p.zone_code as zone,
         p.area,
         v.base_date,
         v.land_value
    FROM nsw_valuer_general.property p
    LEFT JOIN nsw_valuer_general.property_description pd ON p.property_id = pd.property_id
    JOIN nsw_valuer_general.street st ON p.street_id = st.street_id
    JOIN nsw_valuer_general.suburb su ON p.suburb_id = su.suburb_id
    LEFT JOIN nsw_valuer_general.source src ON p.source_id = src.source_id
    LEFT JOIN nsw_valuer_general.source_file sf ON src.source_file_id = sf.source_file_id
    JOIN (SELECT va.* FROM nsw_valuer_general.valuations va
           WHERE va.base_date =
             (SELECT MAX(vb.base_date)
                FROM nsw_valuer_general.valuations vb
               WHERE va.property_id = vb.property_id)) v
      ON v.property_id = p.property_id
   WHERE p.property_id IN :pids
   ORDER BY p.property_id
""", params={
    'pids': (
        '570557', # 650/751743 Non-Irrigable Purchase 15
        '623846', # 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, CP/SP10943
        '1878087', # B/100895 6, PT 20/755520 Enclosure Permit 510145
        '1878096', # 1, 2, 3/133592 99/755520 153/755529 Enclosure Permit 50192
        '1878175', # PT 1/628296 1/705069
        '2681801', # 98/1066289 Mineral Claim 30854 Western Land Lease 14457
        '2681809', # 26/1066289 Western Land Lease 14476 Western Land Lease 31572
        '2682051', # 44/1073508 253/1076808 Western Land Lease 16121
        '2682966', # 15/1073508 Mineral Claim 42522R Western Land Lease 14691
        '3625326', # Forest Permit OCP100015
        '3821010', # 1, 3, 4/366686 1, 2/435063 18, 70/755529 1, 2, 3, 4/1088131 1/1166347
        '3851809', # PT 6401/1257392 Railway Land Lease 221.0037
        '4483251', # 1, 2/199140 2/530749 82, 120, 181/756909 27, 57, 134, PT 135, 136, 137, 190, 202, PT 210, PT 211/756913
        '4483252', # PT 135, PT 210, PT 211/756913 Wind Farm AN614034	
        '3625277',
    )
})

for i, row in property_descriptions.iterrows():
    desc = property_descriptions.loc[i, 'desc']
    remaining, expanded = parse_property_description(desc)
    pprint({
        'description': desc,
        'remaining': remaining,
        'expanded': expanded,
    })

property_descriptions

{'description': '650/751743 Non-Irrigable Purchase 15',
 'expanded': [NonIrrigablePurchase(id='15'),
              LandParcel(id='650/751743', part=False)],
 'remaining': ''}
{'description': '1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, '
                '18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, CP/SP10943',
 'expanded': [LandParcel(id='1/SP10943', part=False),
              LandParcel(id='2/SP10943', part=False),
              LandParcel(id='3/SP10943', part=False),
              LandParcel(id='4/SP10943', part=False),
              LandParcel(id='5/SP10943', part=False),
              LandParcel(id='6/SP10943', part=False),
              LandParcel(id='7/SP10943', part=False),
              LandParcel(id='8/SP10943', part=False),
              LandParcel(id='9/SP10943', part=False),
              LandParcel(id='10/SP10943', part=False),
              LandParcel(id='11/SP10943', part=False),
              LandParcel(id='12/SP10943', part=False),
              Land

Unnamed: 0,nsw_id,desc,name,unit,house,street,suburb,postcode,zone,area,base_date,land_value
0,570557,650/751743 Non-Irrigable Purchase 15,,,4.0,ABATTOIR RD,LAKE WYANGAN,2680,R5,3565.0,2023-01-07,439000
1,623846,"1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...",,,1.0,AARON PL,WAHROONGA,2076,R2,9381.0,2023-01-07,7580000
2,1878087,"B/100895 6, PT 20/755520 Enclosure Permit 510145",4-D,,971.0,4D RD,CAROONA,2343,RU1,478200.0,2023-01-07,438000
3,1878096,"1, 2, 3/133592 99/755520 153/755529 Enclosure ...",LYNDEN,,758.0,4D RD,CAROONA,2343,RU1,3019900.0,2023-01-07,5320000
4,1878175,PT 1/628296 1/705069,YARRAMAN BLOCK,,1055.0,4D RD,SPRING RIDGE,2343,RU1,2147000.0,2023-01-07,3790000
5,2681801,98/1066289 Mineral Claim 30854 Western Land Le...,,,,14-10,LIGHTNING RIDGE,2834,SP1,2588.0,2023-01-07,3250
6,2681809,26/1066289 Western Land Lease 14476 Western La...,,,,14-11,LIGHTNING RIDGE,2834,SP1,2455.0,2023-01-07,3250
7,2682051,44/1073508 253/1076808 Western Land Lease 16121,,,,17-09,LIGHTNING RIDGE,2834,SP1,2535.0,2023-01-07,3250
8,2682966,15/1073508 Mineral Claim 42522R Western Land L...,,,,16-09,LIGHTNING RIDGE,2834,SP1,2477.0,2023-01-07,3250
9,3625277,"PT 98/721203 PT 93, PT 94, PT 96/755820 63, 64...",TARWONGA & AUCHEN DHU,,174.0,ABBOTTSLEY RD,WALCHA,2354,RU1,9442920.0,2023-01-07,6040000


In [7]:
property_descriptions = get_the_data("""
  SELECT p.property_id as nsw_id,
         pd.property_description as desc,
         -- src.source_file_position,
         -- sf.source_file_name,
         p.property_name as name,
         p.unit_number as unit,
         p.house_number as house,
         st.street_name as street,
         su.suburb_name as suburb,
         p.postcode,
         p.zone_code as zone,
         p.area,
         v.base_date,
         v.land_value
    FROM nsw_valuer_general.property p
    LEFT JOIN nsw_valuer_general.property_description pd ON p.property_id = pd.property_id
    JOIN nsw_valuer_general.street st ON p.street_id = st.street_id
    JOIN nsw_valuer_general.suburb su ON p.suburb_id = su.suburb_id
    LEFT JOIN nsw_valuer_general.source src ON p.source_id = src.source_id
    LEFT JOIN nsw_valuer_general.source_file sf ON src.source_file_id = sf.source_file_id
    JOIN (SELECT va.* FROM nsw_valuer_general.valuations va
           WHERE va.base_date =
             (SELECT MAX(vb.base_date)
                FROM nsw_valuer_general.valuations vb
               WHERE va.property_id = vb.property_id)) v
      ON v.property_id = p.property_id
   WHERE p.property_id IN :pids
   ORDER BY p.property_id
""", params={
    'pids': (
        '570557', # 650/751743 Non-Irrigable Purchase 15
        # '623846', # 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, CP/SP10943
        # '1878087', # B/100895 6, PT 20/755520 Enclosure Permit 510145
        # '1878096', # 1, 2, 3/133592 99/755520 153/755529 Enclosure Permit 50192
        # '1878175', # PT 1/628296 1/705069
        # '2681801', # 98/1066289 Mineral Claim 30854 Western Land Lease 14457
        # '2681809', # 26/1066289 Western Land Lease 14476 Western Land Lease 31572
        # '2682051', # 44/1073508 253/1076808 Western Land Lease 16121
        # '2682966', # 15/1073508 Mineral Claim 42522R Western Land Lease 14691
        # '3625326', # Forest Permit OCP100015
        # '3821010', # 1, 3, 4/366686 1, 2/435063 18, 70/755529 1, 2, 3, 4/1088131 1/1166347
        # '3851809', # PT 6401/1257392 Railway Land Lease 221.0037
        # '4483251', # 1, 2/199140 2/530749 82, 120, 181/756909 27, 57, 134, PT 135, 136, 137, 190, 202, PT 210, PT 211/756913
        # '4483252', # PT 135, PT 210, PT 211/756913 Wind Farm AN614034	
        '3625277',
    )
})

for i, row in property_descriptions.iterrows():
    desc = property_descriptions.loc[i, 'desc']
    remaining, expanded = parse_property_description(desc)
    pprint({
        'description': desc,
        'remaining': remaining,
        'expanded': expanded,
    })

property_descriptions

{'description': '650/751743 Non-Irrigable Purchase 15',
 'expanded': [NonIrrigablePurchase(id='15'),
              LandParcel(id='650/751743', part=False)],
 'remaining': ''}
{'description': 'PT 98/721203 PT 93, PT 94, PT 96/755820 63, 64, 311, 312, '
                '313, 314, 358, 359, 360, 361/756502 2/1154215 PT 1/1154216 '
                '100/1166366 Licence 402630',
 'expanded': [CrownLandLicense(id='402630'),
              LandParcel(id='98/721203', part=True),
              LandParcel(id='93/755820', part=True),
              LandParcel(id='94/755820', part=True),
              LandParcel(id='96/755820', part=True),
              LandParcel(id='63/756502', part=False),
              LandParcel(id='64/756502', part=False),
              LandParcel(id='311/756502', part=False),
              LandParcel(id='312/756502', part=False),
              LandParcel(id='313/756502', part=False),
              LandParcel(id='314/756502', part=False),
              LandParcel(id='358/756502

Unnamed: 0,nsw_id,desc,name,unit,house,street,suburb,postcode,zone,area,base_date,land_value
0,570557,650/751743 Non-Irrigable Purchase 15,,,4,ABATTOIR RD,LAKE WYANGAN,2680,R5,3565.0,2023-01-07,439000
1,3625277,"PT 98/721203 PT 93, PT 94, PT 96/755820 63, 64...",TARWONGA & AUCHEN DHU,,174,ABBOTTSLEY RD,WALCHA,2354,RU1,9442920.0,2023-01-07,6040000
