# Property description analysis

This notebook mostly exists to analysis the contents of the properties in the valuer general.

## Initialise database client

Initialise the database client

In [1]:
from lib.gnaf_db import GnafDb
from lib import notebook_constants as nc

gnaf = GnafDb.create()
gnaf_2 = GnafDb.create(nc.gnaf_dbconf_2, nc.gnaf_dbname_2)
gnaf.wait_till_running()
gnaf_2.wait_till_running()

## Create some helper functions

- `get_the_data`, this really just wraps calls to `pd.read_sql`, mostly boilerplate

In [2]:
import pandas as pd
from pprint import pprint
from sqlalchemy import text
from lib.nsw_vg.property_description import parse_property_description

def get_the_data(query, params=None, instance='gnaf_2'):
    engine = None
    if instance == 'gnaf_1':
        engine = gnaf.engine()
    elif instance == 'gnaf_2':
        engine = gnaf_2.engine()
    return pd.read_sql(text(query), engine, params=params)

## SQL Boilerplate

We'll use this to fetch different stuff from the database.

In [14]:
def most_properties(limit, offset):
    return get_the_data("""
      SELECT p.property_id as nsw_id,
             pd.property_description as desc,
             p.property_name as name,
             p.unit_number as unit,
             p.house_number as house,
             st.street_name as street,
             su.suburb_name as suburb,
             p.postcode,
             p.zone_code as zone,
             p.area,
             v.base_date,
             v.land_value
        FROM nsw_valuer_general.property_description pd
        JOIN nsw_valuer_general.property p ON p.property_id = pd.property_id
        JOIN nsw_valuer_general.street st ON p.street_id = st.street_id
        JOIN nsw_valuer_general.suburb su ON p.suburb_id = su.suburb_id
        JOIN (SELECT va.* FROM nsw_valuer_general.valuations va
               WHERE va.base_date =
                 (SELECT MAX(vb.base_date)
                    FROM nsw_valuer_general.valuations vb
                   WHERE va.property_id = vb.property_id)) v
          ON v.property_id = p.property_id
       WHERE pd.property_description !~ '^\d+/\d+(/\d+)?$' AND
             pd.property_description !~ '^,? ?Mineral (Claim|Lease) \d+[A-Z]?\s*$' AND
             pd.property_description !~ '^(\d+/\d+(/\d+)? )*(Mineral Claim \d+[R]? )*(Western Land Lease \d+ *)*$'
       ORDER BY land_value DESC, street, house, unit
      OFFSET   :offset
       LIMIT   :limit
    """, params={ 'offset': offset, 'limit': limit })

def description_containing(t, limit, offset):
    return get_the_data("""
      SELECT p.property_id as nsw_id,
             pd.property_description as desc,
             p.property_name as name,
             p.unit_number as unit,
             p.house_number as house,
             st.street_name as street,
             su.suburb_name as suburb,
             p.postcode,
             p.zone_code as zone,
             p.area,
             v.base_date,
             v.land_value
        FROM nsw_valuer_general.property_description pd
        JOIN nsw_valuer_general.property p ON p.property_id = pd.property_id
        JOIN nsw_valuer_general.street st ON p.street_id = st.street_id
        JOIN nsw_valuer_general.suburb su ON p.suburb_id = su.suburb_id
        JOIN (SELECT va.* FROM nsw_valuer_general.valuations va
               WHERE va.base_date =
                 (SELECT MAX(vb.base_date)
                    FROM nsw_valuer_general.valuations vb
                   WHERE va.property_id = vb.property_id)) v
          ON v.property_id = p.property_id
       WHERE pd.property_description ~ :text
       ORDER BY land_value DESC, street, house, unit
      OFFSET   :offset
       LIMIT   :limit
    """, params={ 'text': t, 'offset': offset, 'limit': limit })


def get_by_properties_id(property_ids):
    return get_the_data("""
      SELECT p.property_id as nsw_id,
             pd.property_description as desc,
             p.property_name as name,
             p.unit_number as unit,
             p.house_number as house,
             st.street_name as street,
             su.suburb_name as suburb,
             p.postcode,
             p.zone_code as zone,
             p.area,
             v.base_date,
             v.land_value
        FROM nsw_valuer_general.property p
        LEFT JOIN nsw_valuer_general.property_description pd ON p.property_id = pd.property_id
        JOIN nsw_valuer_general.street st ON p.street_id = st.street_id
        JOIN nsw_valuer_general.suburb su ON p.suburb_id = su.suburb_id
        LEFT JOIN nsw_valuer_general.source src ON p.source_id = src.source_id
        LEFT JOIN nsw_valuer_general.source_file sf ON src.source_file_id = sf.source_file_id
        JOIN (SELECT va.* FROM nsw_valuer_general.valuations va
               WHERE va.base_date =
                 (SELECT MAX(vb.base_date)
                    FROM nsw_valuer_general.valuations vb
                   WHERE va.property_id = vb.property_id)) v
          ON v.property_id = p.property_id
       WHERE p.property_id IN :pids
       ORDER BY p.property_id
    """, params={ "pids": property_ids })

def get_gnaf_address(limit, offset):
    return get_the_data("""
      SELECT 
          ad.address_detail_pid as gnaf_id,
          ad.legal_parcel_id,
          ad.building_name,
          CONCAT(ad.lot_number_prefix, ad.lot_number, ad.lot_number_suffix) as lot,
          CONCAT(ad.flat_number_prefix, ad.flat_number, ad.flat_number_suffix) as unit,
          CONCAT(COALESCE(ad.level_type_code || ' ', ''),
                 COALESCE(ad.level_number_prefix || '-', ''),
                 ad.level_number,
                 COALESCE('-' || ad.level_number_suffix, '')) as level,
          CONCAT(
            ad.number_first_prefix, ad.number_first, ad.number_first_suffix,
            COALESCE('-' || ad.number_last_prefix || ad.number_last || ad.number_last_suffix, '')
          ) as house,
          sl.street_name,
          l.locality_name,
          ad.postcode
        FROM gnaf.ADDRESS_DETAIL ad
        JOIN gnaf.LOCALITY l ON ad.locality_pid = l.locality_pid
        JOIN gnaf.STREET_LOCALITY sl ON ad.street_locality_pid = sl.street_locality_pid
        JOIN gnaf.STATE s ON l.state_pid = s.state_pid
       WHERE s.state_abbreviation = 'NSW'
         AND ad.legal_parcel_id IN (
          SELECT ad.legal_parcel_id FROM gnaf.ADDRESS_DETAIL ad
           GROUP BY ad.legal_parcel_id HAVING COUNT(*) > 1)
       ORDER BY ad.legal_parcel_id, street_name, house, level, unit
      OFFSET   :offset
       LIMIT   :limit
    """, params={ 'offset': offset, 'limit': limit })


## Introspect the property description

Here we are just seeing which property descriptions seem to contain content that is abit weird.

In [11]:
from lib.nsw_vg.property_description import parse_property_description

rows = most_properties(limit=100, offset=2400)
# rows = most_properties(limit=50, offset=200) # have not checked up to 1900

weird = []
for i, row in rows.iterrows():
    desc = rows.loc[i, 'desc']
    remaining, expanded = parse_property_description(desc)
    
    if remaining:
        pprint({
            'id': int(rows.loc[i, 'nsw_id']),
            'description': desc,
            'remaining': remaining,
            # 'expanded': expanded,
        })
        weird.append(desc)
pprint(weird)

display(rows.iloc[:50])
display(rows.iloc[50:])

{'description': '1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, '
                '18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, '
                '33, 34, 35, 36, 37, 38, 39, 40, 41, 42, CP/SP36548 LOT 100 DP '
                '716204 (BEING LOTS 1/42 SP 36548)',
 'id': 854983,
 'remaining': 'LOT100DP716204(BEINGLOTS1/42SP36548)'}
{'description': '1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, '
                '20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, '
                '35, 36, 37, CP/SP32905 38/SP32921 MARINA Permissive Occupancy '
                '1957/409',
 'id': 1628476,
 'remaining': 'MARINA/409'}
{'description': '1/502735 MSB LEASE', 'id': 2091862, 'remaining': 'MSBLEASE'}
{'description': '192/874868 Shared use - 37% NSW Maritime 30003573',
 'id': 4033553,
 'remaining': '-37%'}
['1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, '
 '22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 

Unnamed: 0,nsw_id,desc,name,unit,house,street,suburb,postcode,zone,area,base_date,land_value
0,3631113,PT 1/1169345,PIRRAMA PARK,,20,PIRRAMA RD,PYRMONT,2009,RE1,12750.0,2023-01-07,24600000
1,3435051,"1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...",,,60-82,PRINCES HWY,ST PETERS,2044,MU1,3985.0,2023-01-07,24600000
2,793651,"1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16...",,,50,UPPER PITT ST,KIRRIBILLI,2061,R4,2892.0,2023-01-07,24600000
3,2102466,A/367302,,,46,VAUCLUSE RD,VAUCLUSE,2030,R2,1707.0,2023-01-07,24600000
4,2076344,"1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...",,,17-23,WALLIS PDE,NORTH BONDI,2026,R3,2946.6,2023-01-07,24600000
5,4151551,"1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...",,,697-701,PITTWATER RD,DEE WHY,2099,MU1,5276.0,2023-01-07,24594966
6,3315412,PT 38/1098588,,,50,BARROW RD,MOUNT ANNAN,2567,E4,310000.0,2023-01-07,24500000
7,3810429,"1/114218 1/180264 1, 2, 3, 4, 7, 8, 9, 10, 27,...",BEREMBED STATION,,431,CARINGA RD,GRONG GRONG,2652,RU4,42803300.0,2023-01-07,24500000
8,728034,"1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...",,,52,CHRISTIE ST,ST LEONARDS,2065,E2,4591.0,2023-01-07,24500000
9,1390904,"1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...",,,11-13,CLARENCE ST,BURWOOD,2134,MU1,2379.0,2023-01-07,24500000


Unnamed: 0,nsw_id,desc,name,unit,house,street,suburb,postcode,zone,area,base_date,land_value
50,4353817,"PT 10, PT 11, PT 17, 18, 19, 20, 59, 60, 61, P...",,,48,CHILDE ST,BYRON BAY,2481,P,4637.4,2023-01-07,24300000
51,1420570,"8/3248 1, 2, 3, 4, 5/14348 1, 2, 3, 4, 5, 6/14...",HAMPDEN PRIMARY SCHOOL,,39-79,HAMPDEN RD,LAKEMBA,2195,R3,35490.0,2023-01-07,24300000
52,698306,"4, 5/1209 1/122431 1/122432 1/376563",SCHOOL ST IVES SOUTH,,60-70,HORACE ST,ST IVES,2075,SP2,77810.0,2023-01-07,24300000
53,411049,"2/734883 2, PT 3, 4, 5, 6, 7/740683 1, 2, 3/78...",,,,LACHLAN VALLEY WAY,GODFREYS CREEK,2586,RU1,55310000.0,2023-01-07,24300000
54,2091862,1/502735 MSB LEASE,,,16,LONGWORTH AVE,POINT PIPER,2027,R2,598.0,2023-01-07,24300000
55,2035628,"2, 3, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1...",PARK HOUSE,,187-191,MACQUARIE ST,SYDNEY,2000,SP5,540.7,2023-01-07,24300000
56,2847293,78/6746 1/1028649,PACIFIC CLUB,,29-30,OCEAN RD,PALM BEACH,2108,C4,2797.0,2023-01-07,24300000
57,792363,A/431687 10/660453 1/772247,,,583,PACIFIC HWY,ST LEONARDS,2065,MU1,1350.0,2023-01-07,24300000
58,708159,"1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...",GORDON GARDENS,,803-805,PACIFIC HWY,GORDON,2072,R4,6314.1,2023-01-07,24300000
59,1556906,"1, 2/812621",,,26-38,POWELL ST,HOMEBUSH,2140,MU1,5242.0,2023-01-07,24300000


In [5]:
property_descriptions = get_by_properties_id((
    '570557', # 650/751743 Non-Irrigable Purchase 15
    '623846', # 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, CP/SP10943
    '1878087', # B/100895 6, PT 20/755520 Enclosure Permit 510145
    '1878096', # 1, 2, 3/133592 99/755520 153/755529 Enclosure Permit 50192
    '1878175', # PT 1/628296 1/705069
    '2681801', # 98/1066289 Mineral Claim 30854 Western Land Lease 14457
    '2681809', # 26/1066289 Western Land Lease 14476 Western Land Lease 31572
    '2682051', # 44/1073508 253/1076808 Western Land Lease 16121
    '2682966', # 15/1073508 Mineral Claim 42522R Western Land Lease 14691
    '3625326', # Forest Permit OCP100015
    '3821010', # 1, 3, 4/366686 1, 2/435063 18, 70/755529 1, 2, 3, 4/1088131 1/1166347
    '3851809', # PT 6401/1257392 Railway Land Lease 221.0037
    '4483251', # 1, 2/199140 2/530749 82, 120, 181/756909 27, 57, 134, PT 135, 136, 137, 190, 202, PT 210, PT 211/756913
    '4483252', # PT 135, PT 210, PT 211/756913 Wind Farm AN614034	
    '3625277',
))

for i, row in property_descriptions.iterrows():
    desc = property_descriptions.loc[i, 'desc']
    remaining, expanded = parse_property_description(desc)
    pprint({
        'description': desc,
        'remaining': remaining,
        'expanded': expanded,
    })

property_descriptions

{'description': '650/751743 Non-Irrigable Purchase 15',
 'expanded': [NonIrrigablePurchase(id='15'),
              LandParcel(id='650/751743', part=False)],
 'remaining': ''}
{'description': '1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, '
                '18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, CP/SP10943',
 'expanded': [LandParcel(id='1/SP10943', part=False),
              LandParcel(id='2/SP10943', part=False),
              LandParcel(id='3/SP10943', part=False),
              LandParcel(id='4/SP10943', part=False),
              LandParcel(id='5/SP10943', part=False),
              LandParcel(id='6/SP10943', part=False),
              LandParcel(id='7/SP10943', part=False),
              LandParcel(id='8/SP10943', part=False),
              LandParcel(id='9/SP10943', part=False),
              LandParcel(id='10/SP10943', part=False),
              LandParcel(id='11/SP10943', part=False),
              LandParcel(id='12/SP10943', part=False),
              Land

Unnamed: 0,nsw_id,desc,name,unit,house,street,suburb,postcode,zone,area,base_date,land_value
0,570557,650/751743 Non-Irrigable Purchase 15,,,4.0,ABATTOIR RD,LAKE WYANGAN,2680,R5,3565.0,2023-01-07,439000
1,623846,"1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...",,,1.0,AARON PL,WAHROONGA,2076,R2,9381.0,2023-01-07,7580000
2,1878087,"B/100895 6, PT 20/755520 Enclosure Permit 510145",4-D,,971.0,4D RD,CAROONA,2343,RU1,478200.0,2023-01-07,438000
3,1878096,"1, 2, 3/133592 99/755520 153/755529 Enclosure ...",LYNDEN,,758.0,4D RD,CAROONA,2343,RU1,3019900.0,2023-01-07,5320000
4,1878175,PT 1/628296 1/705069,YARRAMAN BLOCK,,1055.0,4D RD,SPRING RIDGE,2343,RU1,2147000.0,2023-01-07,3790000
5,2681801,98/1066289 Mineral Claim 30854 Western Land Le...,,,,14-10,LIGHTNING RIDGE,2834,SP1,2588.0,2023-01-07,3250
6,2681809,26/1066289 Western Land Lease 14476 Western La...,,,,14-11,LIGHTNING RIDGE,2834,SP1,2455.0,2023-01-07,3250
7,2682051,44/1073508 253/1076808 Western Land Lease 16121,,,,17-09,LIGHTNING RIDGE,2834,SP1,2535.0,2023-01-07,3250
8,2682966,15/1073508 Mineral Claim 42522R Western Land L...,,,,16-09,LIGHTNING RIDGE,2834,SP1,2477.0,2023-01-07,3250
9,3625277,"PT 98/721203 PT 93, PT 94, PT 96/755820 63, 64...",TARWONGA & AUCHEN DHU,,174.0,ABBOTTSLEY RD,WALCHA,2354,RU1,9442920.0,2023-01-07,6040000


In [6]:
get_gnaf_address(limit=20, offset=60)

Unnamed: 0,gnaf_id,legal_parcel_id,building_name,lot,unit,level,house,street_name,locality_name,postcode
0,GANSW718138632,1000/1117715,PEPPERFIELD,,21,,8,WISEMAN,BOWRAL,2576
1,GANSW718138633,1000/1117715,PEPPERFIELD,,22,,8,WISEMAN,BOWRAL,2576
2,GANSW718138634,1000/1117715,PEPPERFIELD,,23,,8,WISEMAN,BOWRAL,2576
3,GANSW718127753,1000/1117715,PEPPERFIELD,,24,,8,WISEMAN,BOWRAL,2576
4,GANSW718138635,1000/1117715,PEPPERFIELD,,25,,8,WISEMAN,BOWRAL,2576
5,GANSW718138636,1000/1117715,PEPPERFIELD,,26,,8,WISEMAN,BOWRAL,2576
6,GANSW718138637,1000/1117715,PEPPERFIELD,,27,,8,WISEMAN,BOWRAL,2576
7,GANSW718138638,1000/1117715,PEPPERFIELD,,28,,8,WISEMAN,BOWRAL,2576
8,GANSW718138639,1000/1117715,PEPPERFIELD,,29,,8,WISEMAN,BOWRAL,2576
9,GANSW718127760,1000/1117715,PEPPERFIELD,,3,,8,WISEMAN,BOWRAL,2576


# Anonomlies

## Descriptions containing `;`

In [18]:
from lib.nsw_vg.property_description import parse_property_description

rows = description_containing(';', limit=30, offset=0)
# rows = most_properties(limit=50, offset=200) # have not checked up to 1900

weird = []
for i, row in rows.iterrows():
    desc = rows.loc[i, 'desc']
    print(int(rows.loc[i, 'nsw_id']), desc)
    # pprint({
    #     'id': int(rows.loc[i, 'nsw_id']),
    #     'description': desc,
    #     'remaining': remaining,
    #     # 'expanded': expanded,
    # })

display(rows.iloc[:])

3838002 42, PT 57, PT 58, PT 59, PT 60, PT 61/754940 and Pt Leard State Forest No.420 in Parish of Leard; and then about 1700 ha subsurface being Lots 3, pt 35, 55, Pts 58, 64, 65 110 DP 754924; Lots 32, pt 35, 39, 40, 41, 42 DP 754940; Lots 27, 70, 74, 75, Pts 68, 69, 71, 83 DP 754948; Lots 1-2 DP 510801; pt 1 DP 1157540; pt 1, lot 3 DP 1144479; lot 1 DP114793; pt 7001 DP94069. Coal Lease 375 Mining Lease 1701
3407923 122/42056 2/718634 2/733835 58, 69, 132/751078 1, 2/1022767 1, 4, 5/1080470 for grazing(crown land bounded by 105/751078, 1,2/1080470;parcel west of 96/751078) Licence 344585 Licence 405371
748784 252/531397 Council lease (34sqm. - STONE SEAWA;L, RECLAIMED LAND, BOATSHED, DECK T/SKID) SHARED USE NSW Maritime 30003836
3665826 1/787417 13, 14, 15/1225577 Mining Lease 1620 - Total area 365.2 ha (Surface 83.04ha,  Sub-surface 282.2 ha) within Part Lot 14 & Pt Lot 15/1225577& Lot 1 DP787417) ; Mining Lease 1662 - Surface area of 93.09 ha being part ML2 in Ph Tulcumba Co Nande

Unnamed: 0,nsw_id,desc,name,unit,house,street,suburb,postcode,zone,area,base_date,land_value
0,3838002,"42, PT 57, PT 58, PT 59, PT 60, PT 61/754940 a...",MAULES CREEK COLLIERY,,,TRANTHAM RD,BOGGABRI,2382,RU1,43861000.0,2023-01-07,488000000
1,3407923,"122/42056 2/718634 2/733835 58, 69, 132/751078...",,,,MOUNT LINDESAY RD,LISTON,2372,RU1,7777000.0,2023-01-07,5100000
2,748784,252/531397 Council lease (34sqm. - STONE SEAWA...,,,37.0,SEAFORTH CRES,SEAFORTH,2092,C3,1200.0,2023-01-07,3460000
3,3665826,"1/787417 13, 14, 15/1225577 Mining Lease 1620 ...",ROCGLEN,,2569.0,WEAN RD,WEAN,2382,RU1,5823000.0,2023-01-07,1800000
4,3599739,12/1161984 (part Wildlife Refuge No.362); Enc...,,,1092.0,PLAINS STATION RD,TABULAM,2469,RU1,669600.0,2023-01-07,810000
5,4363908,7309/1169890 5997/1205342 6058/1205343 6871/12...,,,,NEW ENGLAND HWY,GLENCOE,2365,RU1,904200.0,2023-01-07,333000
6,970426,PT 7093/93909 AND NSW MARITIME LEASE SHOWN IN ...,,,,SAILORS BAY RD,NORTHBRIDGE,2063,C2,700.0,2023-01-07,189000
7,2473913,1/1062221 (being at Tarana in two parts:- Pump...,,,,SODWALLS RD,TARANA,2787,RU1,6898.5,2023-01-07,71500
8,4362972,PART; crown roads Licence 623573,,,,MAYBOLE RD,BEN LOMOND,2365,RU1,185000.0,2023-01-07,68000
9,1245733,PT 1/209581 PT 7321/1166558 Subsurface Area = ...,MINE,,,DAROOBALGIE RD,FORBES,2871,RU1,122500.0,2023-01-07,27800
