In [43]:
%load_ext autoreload
%autoreload 2
import json
from datetime import date

import polars as pl

from property_models.constants import ADDRESS_SCHEMA, DATA_DIR, POSTCODE_CSV_FILE, PropertyType, RecordType
from property_models.models import Address, PriceRecord, PropertyInfo

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [53]:
data.filter(pl.col("address") == data["address"].item(1))

address,beds,baths,cars,property_size_m2,land_size_m2,condition,property_type,construction_date,floors
struct[7],u8,u8,u8,f32,f32,str,list[str],date,u8
"{22,42,""FDF STREET"",""WEST MELBOURNE"",3032,""VIC"",""australia""}",10,10,10,304.399994,100.300003,,"[""apartment"", ""sixties_brick""]",2000-01-01,


In [2]:
PropertyType.APARTMENT.SIXTIES_BRICK.value

('apartment', 'sixties_brick')

In [3]:
property_info = PropertyInfo(
    property_type=PropertyType.parse(PropertyType.APARTMENT.SIXTIES_BRICK),
    address=Address.parse("80 ROSEBERRY STREET, NORTH MELBOURNE, VIC 3032", country="australia"),
    condition=None,
    beds=10,
    cars=10,
    baths=10,
    floors=10,
    land_size_m2=100.3,
    property_size_m2=304.4,
    construction_date=date(2000, 1, 1),
)
property_info

PropertyInfo(address=Address(unit_number=None, street_number=80, street_name='ROSEBERRY STREET', suburb='NORTH MELBOURNE', postcode=3032, state='VIC', country='australia'), beds=10, baths=10, cars=10, property_size_m2=304.4, land_size_m2=100.3, condition=None, property_type=('apartment', 'sixties_brick'), construction_date=datetime.date(2000, 1, 1), floors=10)

In [4]:
pl.DataFrame([property_info]).select(pl.col("address").struct.unnest())
pl.DataFrame([property_info]).select(
    # pl.col('property_type')
    # .list.to_struct(fields = ["property_type_1","property_type_2"])
    # .struct['property_type_1']
    # .struct.unnest()
    property_type_general=pl.col("property_type").list[0],
    property_type_specific=pl.col("property_type").list[1],
)  # .rows(named= True)

property_type_general,property_type_specific
str,str
"""apartment""","""sixties_brick"""


In [5]:
# (
#     pl.DataFrame([property_info]).join(
#         pl.DataFrame([property_info]).select("address", pl.lit("asdf")),
#         on="address",
#         how="inner",
#     )
# )

In [6]:
data_json = property_info.model_dump()
data_string = json.dumps(data_json, default=str)

In [7]:
data_loaded = json.loads(data_string)
data_loaded

{'address': {'unit_number': None,
  'street_number': 80,
  'street_name': 'ROSEBERRY STREET',
  'suburb': 'NORTH MELBOURNE',
  'postcode': 3032,
  'state': 'VIC',
  'country': 'australia'},
 'beds': 10,
 'baths': 10,
 'cars': 10,
 'property_size_m2': 304.4,
 'land_size_m2': 100.3,
 'condition': None,
 'property_type': ['apartment', 'sixties_brick'],
 'construction_date': '2000-01-01',
 'floors': 10}

In [8]:
data_loaded = json.loads(data_string)
data_loaded

property_info_reloaded = PropertyInfo(
    address=Address(**data_loaded.pop("address")),
    property_type=PropertyType(data_loaded.pop("property_type")),
    construction_date=date.fromisoformat(data_loaded.pop("construction_date")),
    **data_loaded,
)
property_info_reloaded

PropertyInfo(address=Address(unit_number=None, street_number=80, street_name='ROSEBERRY STREET', suburb='NORTH MELBOURNE', postcode=3032, state='VIC', country='australia'), beds=10, baths=10, cars=10, property_size_m2=304.4, land_size_m2=100.3, condition=None, property_type=('apartment', 'sixties_brick'), construction_date=datetime.date(2000, 1, 1), floors=10)

In [9]:
property_info_reloaded == property_info

True

In [10]:
data_loaded = json.loads(data_string)

property_info_reloaded = PropertyInfo(
    address=Address(**data_loaded.pop("address")),
    property_type=PropertyType(data_loaded.pop("property_type")),
    construction_date=date.fromisoformat(data_loaded.pop("construction_date")),
    **data_loaded | {"beds": None},
)
property_info_reloaded

PropertyInfo(address=Address(unit_number=None, street_number=80, street_name='ROSEBERRY STREET', suburb='NORTH MELBOURNE', postcode=3032, state='VIC', country='australia'), beds=None, baths=10, cars=10, property_size_m2=304.4, land_size_m2=100.3, condition=None, property_type=('apartment', 'sixties_brick'), construction_date=datetime.date(2000, 1, 1), floors=10)

In [11]:
property_info_reloaded == property_info

False

In [12]:
properties_info_json = b"""[
{"address": {"unit_number": null, "street_number": 80, "street_name": "ROSEBERRY STREET", "suburb": "NORTH MELBOURNE", "postcode": 3032, "state": "VIC", "country": "australia"}, "beds": 10, "baths": 10, "cars": 10, "property_size_m2": 304.4, "land_size_m2": 100.3, "condition": null, "property_type": ["apartment", "sixties_brick"], "construction_date": "2000-01-01", "floors": 10},
{"address": {"unit_number": 22, "street_number": 42, "street_name": "FDF STREET", "suburb": "WEST MELBOURNE", "postcode": 3032, "state": "VIC", "country": "australia"}, "beds": 10, "baths": 10, "cars": 10, "property_size_m2": 304.4, "land_size_m2": 100.3, "condition": null, "property_type": ["apartment", "sixties_brick"], "construction_date": "2000-01-01", "floors": 1000},
{"address": {"unit_number": null, "street_number": 80, "street_name": "ROSEBERRY STREET", "suburb": "NORTH MELBOURNE", "postcode": 3032, "state": "VIC", "country": "australia"}, "beds": 10, "baths": 10, "cars": 10, "property_size_m2": 304.4, "land_size_m2": 100.3, "condition": null, "property_type": ["apartment", "None"], "construction_date": null, "floors": 100}
]"""

In [13]:
from property_models.constants import PRICE_RECORDS_SCHEMA, PROPERTIES_INFO_SCHEMA

In [14]:
import tempfile

with tempfile.NamedTemporaryFile(delete=True) as temp_file:
    temp_file.write(properties_info_json)
    temp_file.seek(0)
    # data = pl.read_json(properties_info_json,schema=PROPERTY_INFO_PL_SCHEMA | {"date_of_construction": pl.String})
    data = PropertyInfo.read_json(temp_file)
data

Validating properties: 100%|██████████| 3/3 [00:00<00:00, 17924.38it/s]


address,beds,baths,cars,property_size_m2,land_size_m2,condition,property_type,construction_date,floors
struct[7],u8,u8,u8,f32,f32,str,list[str],date,u8
"{null,80,""ROSEBERRY STREET"",""NORTH MELBOURNE"",3032,""VIC"",""australia""}",10,10,10,304.399994,100.300003,,"[""apartment"", ""sixties_brick""]",2000-01-01,10.0
"{22,42,""FDF STREET"",""WEST MELBOURNE"",3032,""VIC"",""australia""}",10,10,10,304.399994,100.300003,,"[""apartment"", ""sixties_brick""]",2000-01-01,
"{null,80,""ROSEBERRY STREET"",""NORTH MELBOURNE"",3032,""VIC"",""australia""}",10,10,10,304.399994,100.300003,,"[""apartment"", ""None""]",,100.0


beds,baths,cars,property_size_m2,land_size_m2,condition,property_type,construction_date,floors,unit_number,street_number,street_name,suburb,postcode,state,country
u8,u8,u8,f32,f32,str,list[str],date,u8,u16,u16,str,str,u16,str,str
10,10,10,304.399994,100.300003,,"[""apartment"", ""sixties_brick""]",2000-01-01,10.0,,80,"""ROSEBERRY STREET""","""NORTH MELBOURNE""",3032,"""VIC""","""australia"""
10,10,10,304.399994,100.300003,,"[""apartment"", ""sixties_brick""]",2000-01-01,,22.0,42,"""FDF STREET""","""WEST MELBOURNE""",3032,"""VIC""","""australia"""
10,10,10,304.399994,100.300003,,"[""apartment"", ""None""]",,100.0,,80,"""ROSEBERRY STREET""","""NORTH MELBOURNE""",3032,"""VIC""","""australia"""


In [15]:
data.to_dict(as_series=False)

{'address': [{'unit_number': None,
   'street_number': 80,
   'street_name': 'ROSEBERRY STREET',
   'suburb': 'NORTH MELBOURNE',
   'postcode': 3032,
   'state': 'VIC',
   'country': 'australia'},
  {'unit_number': 22,
   'street_number': 42,
   'street_name': 'FDF STREET',
   'suburb': 'WEST MELBOURNE',
   'postcode': 3032,
   'state': 'VIC',
   'country': 'australia'},
  {'unit_number': None,
   'street_number': 80,
   'street_name': 'ROSEBERRY STREET',
   'suburb': 'NORTH MELBOURNE',
   'postcode': 3032,
   'state': 'VIC',
   'country': 'australia'}],
 'beds': [10, 10, 10],
 'baths': [10, 10, 10],
 'cars': [10, 10, 10],
 'property_size_m2': [304.3999938964844, 304.3999938964844, 304.3999938964844],
 'land_size_m2': [100.30000305175781, 100.30000305175781, 100.30000305175781],
 'condition': [None, None, None],
 'property_type': [['apartment', 'sixties_brick'],
  ['apartment', 'sixties_brick'],
  ['apartment', 'None']],
 'construction_date': [datetime.date(2000, 1, 1),
  datetime.date

In [39]:
isinstance(data["sd"].dtype, pl.Struct)

[autoreload of property_models.models failed: Traceback (most recent call last):
  File "/home/andre/git/private/property_models/.pixi/envs/default/lib/python3.12/site-packages/IPython/extensions/autoreload.py", line 276, in check
    superreload(m, reload, self.old_objects)
  File "/home/andre/git/private/property_models/.pixi/envs/default/lib/python3.12/site-packages/IPython/extensions/autoreload.py", line 475, in superreload
    module = reload(module)
             ^^^^^^^^^^^^^^
  File "/home/andre/git/private/property_models/.pixi/envs/default/lib/python3.12/importlib/__init__.py", line 131, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 866, in _exec
  File "<frozen importlib._bootstrap_external>", line 991, in exec_module
  File "<frozen importlib._bootstrap_external>", line 1129, in get_code
  File "<frozen importlib._bootstrap_external>", line 1059, in source_to_code
  File "<frozen importlib._bootstrap>", line 488, in _call_with_fram

ColumnNotFoundError: "sd" not found

In [16]:
properties_info_json = {
    "address": [
        {
            "unit_number": None,
            "street_number": 80,
            "street_name": "ROSEBERRY STREET",
            "suburb": "NORTH MELBOURNE",
            "postcode": 3032,
            "state": "VIC",
            "country": "australia",
        },
        {
            "unit_number": None,
            "street_number": 80,
            "street_name": "ROSEBERRY STREET",
            "suburb": "NORTH MELBOURNE",
            "postcode": 3032,
            "state": "VIC",
            "country": "australia",
        },
        {
            "unit_number": None,
            "street_number": 80,
            "street_name": "ROSEBERRY STREET",
            "suburb": "NORTH MELBOURNE",
            "postcode": 3032,
            "state": "VIC",
            "country": "australia",
        },
    ],
    "beds": [10, 10, 10],
    "baths": [10, 10, 10],
    "cars": [10, 10, 10],
    "property_size_m2": [304.3999938964844, 304.3999938964844, 304.3999938964844],
    "land_size_m2": [100.30000305175781, 100.30000305175781, 100.30000305175781],
    "condition": [None, None, None],
    "property_type": [["apartment", "sixties_brick"], ["apartment", "sixties_brick"], ["apartment", "sixties_brick"]],
    "construction_date": [date(2000, 1, 1), date(2000, 1, 1), date(2000, 1, 1)],
    "floors": [10, 10, 10],
}

In [17]:
data["address"].map_elements(lambda row: Address(**row))

  data["address"].map_elements(lambda row: Address(**row))


address
object
unit_number=None street_number=80 street_name='ROSEBERRY STREET' suburb='NORTH MELBOURNE' postcode=3032 state='VIC' country='australia'
unit_number=22 street_number=42 street_name='FDF STREET' suburb='WEST MELBOURNE' postcode=3032 state='VIC' country='australia'
unit_number=None street_number=80 street_name='ROSEBERRY STREET' suburb='NORTH MELBOURNE' postcode=3032 state='VIC' country='australia'


In [18]:
# %%timeit
from tqdm import tqdm

# 35us for 3 rows
# 60ms for 3k rows
# 4s for 30k rows
# fast enough this is fine


# def pro(row):
#     PropertyInfo.from_stringified_dict(row)

# data.map_rows(pro)
for item in tqdm(pl.concat([data] * 10000).to_dicts()):
    PropertyInfo.from_stringified_dict(item)

100%|██████████| 30000/30000 [00:00<00:00, 191562.13it/s]


In [21]:
# properties_info_json