In [8]:
import pandas as pd
import geopandas as gpd

In [9]:
# Read Rent Data
data = pd.read_csv("../../data/curated/API_re_clean.csv", index_col=0)

# Read SA2-Geolocation data (shape file)
sf = gpd.read_file("./SA2_2021_AUST_SHP_GDA2020/SA2_2021_AUST_GDA2020.shp")

In [10]:
from operator import concat

# Slice the geolocation for Victoria
COL_SF = ["SA2_CODE21", "geometry"]
sf = sf.loc[sf["STE_CODE21"] == '2']
sf['geometry'] = sf['geometry'].to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
sf = sf[COL_SF]
sf = sf.dropna()

In [11]:
sf["SA2_CODE21"] = sf["SA2_CODE21"].astype(int)

In [12]:
import numpy as np

data = data.dropna()
data['SA2_CODE'] = np.nan
# data


In [13]:
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

In [14]:
print("Estimated processing size:", len(sf) * len(data))

Estimated processing size: 7670268


In [15]:
# Allocate SA2 code based on the coordinates from rent data
for index, row in data.iterrows():
    for index_area, row_area in sf.iterrows():
        geo = row_area["geometry"]
        fit = geo.contains(Point(row["longitude"], row["latitude"]))

        if fit:
            data.loc[data['id'] == row['id'],'SA2_CODE'] = row_area["SA2_CODE21"]
            # print(row['id'], ":", row_area["SA2_CODE21"])
            break


In [16]:
# Show data loss
print("Original size:", len(data), "=> Result size:", len(data.dropna()))
print("Loss:", len(data) - len(data.dropna()))

Original size: 14694 => Result size: 14694
Loss: 0


In [17]:
# Export as csv
data.to_csv("../../data/curated/API_clean_price_with_SA2_using_geolocation.csv")

In [18]:
import folium
import numpy as np

# make geometry as JSON type
geoJSON = sf['geometry'].to_json()

In [None]:
# Map whole SA2 area
_map = folium.Map(location=[-37, 144], tiles="Stamen Terrain", zoom_start=10)

_map.add_child(folium.Choropleth(
    geo_data=geoJSON,
    name='SA2 Area',
))

_map.save('../../plots/SA2_Map.html')
_map

In [None]:
# Point rent data in the map

data_s = data.dropna(subset=['latitude', 'longitude'])

id_data = data_s['id']
latitude_data = data_s['latitude']
longitude_data = data_s['longitude']

# plot points out of geolocation
for id, lati, long in zip(id_data, latitude_data, longitude_data):
    _map.add_child(
        folium.Marker(location=[lati, long], popup=str(id))
    )

_map.save('../../plots/rentalData_in_SA2Location.html')
_map