In [1]:
%load_ext autoreload
%autoreload 2
from datetime import date

import polars as pl

from property_models.constants import DATA_DIR, POSTCODE_CSV_FILE, PropertyType, RecordType
from property_models.models import Address, HistoricalPrice

In [2]:
pl.read_csv(f"{DATA_DIR}/processed/country_codes.csv")

country_name,alpha-2,alpha-3,clean_name
str,str,str,str
"""Afghanistan""","""AF""","""AFG""","""afghanistan"""
"""Åland Islands""","""AX""","""ALA""","""åland_islands"""
"""Albania""","""AL""","""ALB""","""albania"""
"""Algeria""","""DZ""","""DZA""","""algeria"""
"""American Samoa""","""AS""","""ASM""","""american_samoa"""
…,…,…,…
"""Wallis and Futuna""","""WF""","""WLF""","""wallis_and_futuna"""
"""Western Sahara""","""EH""","""ESH""","""western_sahara"""
"""Yemen""","""YE""","""YEM""","""yemen"""
"""Zambia""","""ZM""","""ZMB""","""zambia"""


In [3]:
import os

# Get the current working directory
current_dir = os.getcwd()
root_dir = current_dir.rsplit("/property_models/", maxsplit=1)[0]
data_dir = f"{root_dir}/property_models/data"
data_dir

'/home/andre/git/private/property_models/data'

In [4]:
data = pl.DataFrame(
    {
        "unit_number": [1, 20, None],
        "date": [date(2020, 1, 1), None, date(2020, 10, 1)],
        "record_type": [RecordType.AUCTION, RecordType.ENQUIRY, RecordType.NO_SALE],
    }
)
print(data)
data.write_csv()

shape: (3, 3)
┌─────────────┬────────────┬─────────────┐
│ unit_number ┆ date       ┆ record_type │
│ ---         ┆ ---        ┆ ---         │
│ i64         ┆ date       ┆ str         │
╞═════════════╪════════════╪═════════════╡
│ 1           ┆ 2020-01-01 ┆ auction     │
│ 20          ┆ null       ┆ enquiry     │
│ null        ┆ 2020-10-01 ┆ no_sale     │
└─────────────┴────────────┴─────────────┘


'unit_number,date,record_type\n1,2020-01-01,auction\n20,,enquiry\n,2020-10-01,no_sale\n'

In [5]:
records_csv = b"""\
    unit_number,street_number,street_name,date,record_type,price
    ,1,STEELE STREET,2020-01-01,auction,1000000
    10,31,LONG ROAD,2020-10-01,no_sale,500000
    ,31,BROAD WAY,2025-12-01,private_sale,5000000
    """
records_csv

b'    unit_number,street_number,street_name,date,record_type,price\n    ,1,STEELE STREET,2020-01-01,auction,1000000\n    10,31,LONG ROAD,2020-10-01,no_sale,500000\n    ,31,BROAD WAY,2025-12-01,private_sale,5000000\n    '

In [6]:
import tempfile

# Create a temporary file
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
    # Write data to the temporary file
    temp_file.write(records_csv)
    temp_file.seek(0)

    data_csv = HistoricalPrice.read_csv(records_csv)

data_csv.to_dict(as_series=False)

{'unit_number': [None, 10, None, None],
 'street_number': [1, 31, 31, None],
 'street_name': ['STEELE STREET', 'LONG ROAD', 'BROAD WAY', None],
 'date': [datetime.date(2020, 1, 1),
  datetime.date(2020, 10, 1),
  datetime.date(2025, 12, 1),
  None],
 'record_type': ['auction', 'no_sale', 'private_sale', None],
 'price': [1000000, 500000, 5000000, None]}

In [7]:
record_json = {
    "unit_number": [None, 10, None, None],
    "street_number": [1, 31, 31, None],
    "street_name": ["STEELE STREET", "LONG ROAD", "BROAD WAY", None],
    "date": [date(2020, 1, 1), date(2020, 10, 1), date(2025, 12, 1), None],
    "record_type": ["auction", "no_sale", "private_sale", None],
    "price": [1000000, 500000, 5000000, None],
}

data_json = pl.DataFrame(record_json)

In [8]:
import polars.testing

pl.testing.assert_frame_equal(data_csv, data_json, check_dtypes=False)

In [10]:
historical_prices = [
    HistoricalPrice(
        date=date(2020, 1, 1),
        record_type=RecordType.parse(RecordType.AUCTION),
        address=Address.parse("80 FIFTH STREET, ASCOT VALE, VIC 3032", country="australia"),
        price=100000,
    ),
    HistoricalPrice(
        date=date(2020, 1, 1),
        record_type=RecordType.parse(RecordType.ENQUIRY),
        address=Address.parse("80 SAMPLE STREET, ASCOT VALE, VIC 3032", country="australia"),
        price=None,
    ),
    HistoricalPrice(
        date=date(2020, 1, 1),
        record_type=RecordType.parse(" NO Sale"),
        address=Address.parse("80 ROSEBERRY STREET, NORTH MELBOURNE, VIC 3032", country="australia"),
        price=200000,
    ),
]
# historical_prices[0].model_dump()
historical_records = HistoricalPrice.to_records(historical_prices)
historical_records

unit_number,street_number,street_name,date,record_type,price
u16,u16,str,date,str,u32
,80,"""FIFTH STREET""",2020-01-01,"""auction""",100000.0
,80,"""SAMPLE STREET""",2020-01-01,"""enquiry""",
,80,"""ROSEBERRY STREET""",2020-01-01,"""no_sale""",200000.0


In [13]:
historical_records.to_dict(as_series=False)
records_json = {
    "unit_number": [None, None, None],
    "street_number": [80, 80, 80],
    "street_name": ["FIFTH STREET", "SAMPLE STREET", "ROSEBERRY STREET"],
    "date": [date(2020, 1, 1), date(2020, 1, 1), date(2020, 1, 1)],
    "record_type": ["auction", "enquiry", "no_sale"],
    "price": [100000, None, 200000],
}

data_json = pl.DataFrame(record_json)
data_json

unit_number,street_number,street_name,date,record_type,price
i64,i64,str,date,str,i64
,1.0,"""STEELE STREET""",2020-01-01,"""auction""",1000000.0
10.0,31.0,"""LONG ROAD""",2020-10-01,"""no_sale""",500000.0
,31.0,"""BROAD WAY""",2025-12-01,"""private_sale""",5000000.0
,,,,,
