---------
# Preprocess 2
1. merge suburb population
2. merge suburb scaled offence count
---------

In [3]:
import numpy as np
import pandas as pd
import json
import seaborn as sns
import geopandas as gpd
from geopandas.tools import sjoin



load dataset

In [9]:
property = pd.read_csv('../data/raw/properties_preporcessed_1.csv', index_col=[0])
population = pd.read_csv('../data/raw/external_data/population_for_suburb.csv', index_col=[0])
crime = pd.read_csv('../data/raw/external_data/crime_completed.csv', index_col=[0])
income = pd.read_csv('../data/raw/income_for_suburb.csv', index_col=[0])

In [5]:
property.head(2)

Unnamed: 0,name,cost_text,rooms,type,cloest station,station_distance,cloest school,school_distance,CBD_distance,postcode,...,text,beds,bath,parking,Longitude,Latitude,SA2_CODE21,LOC_PID,LOC_NAME,SA2_MAIN16
0,1901/368 St Kilda Road Melbourne VIC 3000,$1800 Per Week,"['3 Beds', '2 Baths', '2 Parking']",Apartment / Unit / Flat,30,1803.65,1374,667.06,1702.19,3000,...,Per Week,3,2,2,144.971027,-37.831809,206051512.0,loc9901d119afda,Melbourne,206051132.0
1,1211/200 Spencer Street Melbourne VIC 3000,$480 per week,"['1 Bed', '1 Bath', '1 Parking']",Apartment / Unit / Flat,176,371.68,2265,352.49,2267.13,3000,...,per week,1,1,1,144.953247,-37.816228,206041505.0,loc9901d119afda,Melbourne,206041122.0


In [6]:
population.head(2)

Unnamed: 0,LOC_PID,suburb_population,LOC_NAME,suburb_area,density
0,loc0067a4549ed1,4897.0,Korumburra,59.867302,81.797573
1,loc00a9769647d7,26158.0,Kew,10.520939,2486.279947


In [7]:
crime.head(2)

Unnamed: 0,LOC_PID,LOC_NAME,offence_count_scaled
0,locb9872f35df41,Abbotsford,0.304761
1,loc8123ed12ea8d,Aberfeldie,0.15468


In [10]:
income.head(2)

Unnamed: 0,LOC_NAME,2011-12.3,2012-13.3,2013-14.3,2014-15.3,2015-16.3,2016-17.3,2017-18.3,2018-19.3,2019,2020,2021,2022,2023,2024
0,Abbotsford,50034.0,50558.0,51419.0,53987.0,57501.0,58359.0,61476.0,64090.0,66409.949224,68813.876673,71304.822215,73885.935758,76560.481231,79331.84071
1,Aberfeldie,50458.0,52097.0,53037.0,54683.0,56381.0,56597.0,59116.0,61204.0,62920.085721,64684.288398,66497.957172,68362.479013,70279.279779,72249.825306


* merge suburb population
* merge suburb scaled offence count

In [15]:
property_pop = pd.merge(property, population.drop(['LOC_NAME', 'suburb_area'], axis=1), on="LOC_PID", how='left')
property_pop_crime = pd.merge(property_pop, crime.drop("LOC_NAME", axis=1), on='LOC_PID', how='left')
property_pop_crime_income = pd.merge(property_pop_crime, income[['LOC_NAME', '2022']], on='LOC_NAME', how='left')
property_pop_crime_income = property_pop_crime_income.rename(columns={'2022' : '2022_income'})
property_pop_crime_income.head(2)

Unnamed: 0,name,cost_text,rooms,type,cloest station,station_distance,cloest school,school_distance,CBD_distance,postcode,...,Longitude,Latitude,SA2_CODE21,LOC_PID,LOC_NAME,SA2_MAIN16,suburb_population,density,offence_count_scaled,2022_income
0,1901/368 St Kilda Road Melbourne VIC 3000,$1800 Per Week,"['3 Beds', '2 Baths', '2 Parking']",Apartment / Unit / Flat,30,1803.65,1374,667.06,1702.19,3000,...,144.971027,-37.831809,206051512.0,loc9901d119afda,Melbourne,206051132.0,64537.612413,9902.586904,0.614944,59707.937643
1,1211/200 Spencer Street Melbourne VIC 3000,$480 per week,"['1 Bed', '1 Bath', '1 Parking']",Apartment / Unit / Flat,176,371.68,2265,352.49,2267.13,3000,...,144.953247,-37.816228,206041505.0,loc9901d119afda,Melbourne,206041122.0,64537.612413,9902.586904,0.614944,59707.937643


Drop features that not for modeling and analysis

In [16]:
curated_property = property_pop_crime_income.drop(['cost_text', 'rooms', 'name', 'text', 'Longitude', 'Latitude', 'SA2_MAIN16', 'SA2_CODE21'], axis=1)
curated_property['cloest station'] = curated_property['cloest station'].map(lambda x : str(x))
curated_property['cloest school'] = curated_property['cloest school'].map(lambda x : str(x))
curated_property =curated_property.dropna(how='any')
curated_property.head()

Unnamed: 0,type,cloest station,station_distance,cloest school,school_distance,CBD_distance,postcode,address,cost,beds,bath,parking,LOC_PID,LOC_NAME,suburb_population,density,offence_count_scaled,2022_income
0,Apartment / Unit / Flat,30,1803.65,1374,667.06,1702.19,3000,1901/368 St Kilda Road Melbourne,1800.0,3,2,2,loc9901d119afda,Melbourne,64537.612413,9902.586904,0.614944,59707.937643
1,Apartment / Unit / Flat,176,371.68,2265,352.49,2267.13,3000,1211/200 Spencer Street Melbourne,480.0,1,1,1,loc9901d119afda,Melbourne,64537.612413,9902.586904,0.614944,59707.937643
2,Apartment / Unit / Flat,28,248.5,2189,401.73,1937.39,3000,1008/380 Little Lonsdale Street Melbourne,400.0,1,1,0,loc9901d119afda,Melbourne,64537.612413,9902.586904,0.614944,59707.937643
3,Apartment / Unit / Flat,140,404.26,2193,586.61,790.13,3000,3/27 Flinders Lane Melbourne,420.0,1,1,0,loc9901d119afda,Melbourne,64537.612413,9902.586904,0.614944,59707.937643
4,Apartment / Unit / Flat,28,525.08,2265,779.91,1657.95,3000,611/408 Lonsdale Street Melbourne,350.0,1,1,0,loc9901d119afda,Melbourne,64537.612413,9902.586904,0.614944,59707.937643


In [17]:
curated_property.to_csv("../data/curated/property_final.csv")