In [None]:
import pandas as pd
import glob
import os
import seaborn as sns
from matplotlib import pyplot as plt

## Analyse Combined Data

In [None]:
curated_path = "../data/curated/property_all_with_SA2_classified/*.csv"
file_lst2 = []
for fname in glob.glob(curated_path):
    file_lst2.append(fname)
file_lst2
# combine all files in the list
df = pd.concat([pd.read_csv(f) for f in file_lst2 ])
combined_df = df

In [None]:
df

In [None]:

type_df = df[['residence_type', 'year','weekly_rent','postcode']]\
        .groupby(['residence_type'],as_index = False) \
        .agg(
            {\
                'weekly_rent': 'mean', # count number of instances from sample
                'postcode': 'count'
            }
        ) \
        .rename({'postcode': 'num','weekly_rent': 'averaged_wk_rent' }, axis=1)

type_df

In [None]:
#plt.figure(figsize=(15,15))
ax = sns.barplot(
    data=type_df, y="num", x="residence_type",
)

In [None]:
df.columns

### Aggregation by SA2 and residence type

In [None]:

sa2_df = df[['residence_type', 'year','weekly_rent','postcode','SA2_CODE','Unnamed: 0']]\
        .groupby(['SA2_CODE' ,'residence_type'],as_index = False) \
        .agg(
            {\
                'weekly_rent': 'mean', # count number of instances from sample
                'Unnamed: 0': 'count',
            }
        ) \
        .rename({'Unnamed: 0': 'num','weekly_rent': 'averaged_wk_rent' }, axis=1)

sa2_df

In [None]:

sa2_df_all = df[['residence_type', 'year','weekly_rent','postcode','SA2_CODE','Unnamed: 0']]\
        .groupby(['SA2_CODE'],as_index = False) \
        .agg(
            {\
                'weekly_rent': 'mean', # count number of instances from sample
                'Unnamed: 0': 'count',
            }
        ) \
        .rename({'Unnamed: 0': 'num','weekly_rent': 'averaged_wk_rent' }, axis=1)

sa2_df_all

Rank SA2 by number of House and Apt rented in that suburb

In [None]:
sa2_df_a = sa2_df[sa2_df.residence_type == 'Apartment']
sa2_df_h = sa2_df[sa2_df.residence_type == 'House']

sa2_df_a = sa2_df_a.sort_values(by=['num'],ascending=False)
sa2_df_h = sa2_df_h.sort_values(by=['num'],ascending=False)

In [None]:
vic_sa2 = pd.read_csv('../data/curated/sa2_vic_2021.csv')
top_df_a = sa2_df_a.merge(vic_sa2, how='left', left_on='SA2_CODE',right_on = 'SA2_CODE21')
top_df_h = sa2_df_h.merge(vic_sa2, how='left', left_on='SA2_CODE',right_on = 'SA2_CODE21')

In [None]:
top_df_a = top_df_a[['SA2_NAME21','SA2_CODE21']]
top_df_h = top_df_h[['SA2_NAME21','SA2_CODE21']]

In [None]:
html = top_df_a.to_html()
import os
# write html to file
path = '../web/templates/html/'
if not os.path.exists(path):
    os.makedirs(path)
text_file = open("../web/templates/html/top_df_a.html", "w")
text_file.write(html)
text_file.close()

In [None]:
html = top_df_h.to_html()
import os
# write html to file
path = '../web/templates/html/'
if not os.path.exists(path):
    os.makedirs(path)
text_file = open("../web/templates/html/top_df_h.html", "w")
text_file.write(html)
text_file.close()

## Geo Anlaysis

In [None]:
import geopandas as gpd
import json
import pandas as pd
with open('../data/curated/geo.json', 'r') as filename:
    geoJSON=json.load(filename)


In [None]:
sa2_df.head()

In [None]:
SA2 = pd.read_csv('../data/curated/sa2_vic_2021.csv')
sa2_df = sa2_df.merge(SA2, how='left', left_on = 'SA2_CODE', right_on = 'SA2_CODE21').drop(columns = ['SA2_CODE21','Unnamed: 0'])
sa2_df_all = sa2_df_all.merge(SA2, how='left', left_on = 'SA2_CODE', right_on = 'SA2_CODE21').drop(columns = ['SA2_CODE21','Unnamed: 0'])

In [None]:
sa2_df_a = sa2_df[sa2_df.residence_type == 'Apartment']
sa2_df_h = sa2_df[sa2_df.residence_type == 'House']

number of total property

In [None]:

import folium
m = folium.Map(location=[-37.81, 144.96], tiles="Stamen Terrain", zoom_start=10, color='white')
svg_style = '<style>svg {background-color: rgb(255, 255, 255,0.5);}</style>'
m.get_root().header.add_child(folium.Element(svg_style))

c = folium.Choropleth(
    geo_data=geoJSON,
    name='choropleth',
    data=sa2_df_all.reset_index(), 
    columns=['SA2_NAME21','num'],
    key_on='properties.SA2_NAME21', 
    fill_color='PiYG', 
    nan_fill_color='black',
    legend_name='number of total propety',
)

c.add_to(m)

In [None]:
path = '../plots/aggregated_geo/'
import os
if not os.path.exists(path):
    os.makedirs(path)
m.save(path +'number of total propety.html')

number of apartment

In [None]:

import folium
m = folium.Map(location=[-37.81, 144.96], tiles="Stamen Terrain", zoom_start=10, color='white')
svg_style = '<style>svg {background-color: rgb(255, 255, 255,0.5);}</style>'
m.get_root().header.add_child(folium.Element(svg_style))

c = folium.Choropleth(
    geo_data=geoJSON,
    name='choropleth',
    data=sa2_df_a.reset_index(), 
    columns=['SA2_NAME21','num'],
    key_on='properties.SA2_NAME21', 
    fill_color='PiYG', 
    nan_fill_color='black',
    legend_name='number of apartment',
)

c.add_to(m)
m

In [None]:

m.save(path +'number of apartment.html')

number of house

In [None]:

import folium
m = folium.Map(location=[-37.81, 144.96], tiles="Stamen Terrain", zoom_start=10, color='white')
svg_style = '<style>svg {background-color: rgb(255, 255, 255,0.5);}</style>'
m.get_root().header.add_child(folium.Element(svg_style))

c = folium.Choropleth(
    geo_data=geoJSON,
    name='choropleth',
    data=sa2_df_h.reset_index(), 
    columns=['SA2_NAME21','num'],
    key_on='properties.SA2_NAME21', 
    fill_color='PiYG', 
    nan_fill_color='black',
    legend_name='number of apartment',
)

c.add_to(m)
m

In [None]:
m.save(path +'number of houses.html')

averaged weekly rent of apartment

In [None]:

import folium
m = folium.Map(location=[-37.81, 144.96], tiles="Stamen Terrain", zoom_start=10, color='white')
svg_style = '<style>svg {background-color: rgb(255, 255, 255,0.5);}</style>'
m.get_root().header.add_child(folium.Element(svg_style))

c = folium.Choropleth(
    geo_data=geoJSON,
    name='choropleth',
    data=sa2_df_a.reset_index(), 
    columns=['SA2_NAME21','averaged_wk_rent'],
    key_on='properties.SA2_NAME21', 
    fill_color='PiYG', 
    nan_fill_color='black',
    legend_name='averaged weekly rent of apartment',
)

c.add_to(m)
m

In [None]:
m.save(path +'averaged weekly rent of apartment.html')

averaged weekly rent of hosue

In [None]:

import folium
m = folium.Map(location=[-37.81, 144.96], tiles="Stamen Terrain", zoom_start=10, color='white')
svg_style = '<style>svg {background-color: rgb(255, 255, 255,0.5);}</style>'
m.get_root().header.add_child(folium.Element(svg_style))

c = folium.Choropleth(
    geo_data=geoJSON,
    name='choropleth',
    data=sa2_df_h.reset_index(), 
    columns=['SA2_NAME21','averaged_wk_rent'],
    key_on='properties.SA2_NAME21', 
    fill_color='PiYG', 
    nan_fill_color='black',
    legend_name='averaged weekly rent of house',
)

c.add_to(m)
m

In [None]:
m.save(path +'averaged weekly rent of house.html')