In [1]:
import pandas as pd
import re
import ast

In [6]:
def clean_price(price):
    if pd.isna(price):
        return None
    # 使用正则表达式匹配价格
    match = re.search(r'(\$?[\d,]+(?:\.\d+)?)\s*(p/?w|per week|p/?m|per month)?', str(price), re.IGNORECASE)
    if match:
        # 移除非数字字符并转换为浮点数
        clean_price_str = re.sub(r'[^\d.]', '', match.group(1))
        price_value = float(clean_price_str)
        
        # 如果是每月价格，转换为每周价格
        if match.group(2) and ('m' in match.group(2).lower() or 'month' in match.group(2).lower()):
            price_value /= 4.3  # 假设1个月等于4.3周
        
        return round(price_value, 2)
    # 如果没有找到有效的价格，返回 None
    return None

def parse_location(location_str):
    if pd.isna(location_str):
        return pd.Series({'streetAddress': None, 'addressLocality': None, 'addressRegion': None, 'postalCode': None})
    location_dict = ast.literal_eval(location_str)
    return pd.Series(location_dict)

In [10]:
property_df = pd.read_csv("../data/landing/properties.csv")

In [11]:
property_df['price'] = property_df['price'].apply(clean_price)

# 解析位置信息
location_df = property_df['location'].apply(parse_location)
property_df = pd.concat([property_df.drop('location', axis=1), location_df], axis=1)

In [15]:
new_order = [
    'streetAddress', 'price', 'bedrooms', 'bathrooms', 'parking',
    'addressLocality', 'addressRegion', 'postalCode',
    'property_type', 'nearby_schools', 'nbn_type', 'agency',
    'geo', 'age_distribution', 'area'
]

In [16]:
property_df = property_df[new_order]

In [17]:
property_df.head()

Unnamed: 0,streetAddress,price,bedrooms,bathrooms,parking,addressLocality,addressRegion,postalCode,property_type,nearby_schools,nbn_type,agency,geo,age_distribution,area
0,6/52 Grange Road,600.0,2,1,1,Toorak,VIC,3142,Apartment,"['Prahran High School', 'SEDA College (Victori...",HFC,"{'id': 4450, 'name': 'Marshall White Stonningt...","{'latitude': -37.8350551, 'longitude': 145.012...","{'under 20': '16%', '20 - 39': '21%', '40 - 59...",Not available
1,202/572 St Kilda Road,550.0,1,1,0,Melbourne,VIC,3004,Apartment,"['South Yarra Primary School', 'Prahran High S...",,"{'id': 22398, 'name': 'Melbourne Residential R...","{'latitude': -37.8475988, 'longitude': 144.979...","{'under 20': '15%', '20 - 39': '38%', '40 - 59...",Not available
2,20/24 Springfield Avenue,925.0,3,1,1,Toorak,VIC,3142,Apartment,"['Prahran High School', ""Lauriston Girls' Scho...",HFC,"{'id': 4450, 'name': 'Marshall White Stonningt...","{'latitude': -37.84506409999999, 'longitude': ...","{'under 20': '16%', '20 - 39': '39%', '40 - 59...",Not available
3,16 Tolkien Drive,470.0,4,2,2,Mambourin,VIC,3024,House,"['Manor Lakes P-12 College', 'Christway Colleg...",,"{'id': 27202, 'name': 'Victoria Real Estate Ag...","{'latitude': -37.89191, 'longitude': 144.585974}",{},Not available
4,20 Oak Street,1400.0,4,3,2,Surrey Hills,VIC,3127,House,"['Mount Scopus Memorial College', 'Emmaus Coll...",HFC,"{'id': 18650, 'name': 'Kay & Burton Boroondara'}","{'latitude': -37.8333697, 'longitude': 145.110...","{'under 20': '30%', '20 - 39': '25%', '40 - 59...",Not available
