In [2]:
# Imports

import pandas as pd
from pathlib import Path

In [3]:
# Load data

df = pd.read_csv(Path('../data/Melbourne_clean.csv'))
df['date'] = pd.to_datetime(df['date'])

In [4]:
# Basic KPIs

kpis = {
    'Total Listings' : len(df),
    'Date Range' :  f"{df['date'].min().date()} to {df['date'].max().date()}",
    'Average Price' : f"{df['price'].mean():.0f}",
    'Median Price' : f"{df['price'].median():.0f}",
    'Average Land Size (m²)' : f"{df['landsize'].mean():.1f}"
}

for i,j in kpis.items():
    print(f'{i}: {j}')

Total Listings: 13510
Date Range: 2016-03-09 to 2017-12-08
Average Price: 1074796
Median Price: 900750
Average Land Size (m²): 558.2


In [5]:
# Average price by property type

type_labels = {
    'h' : 'House',
    'u' : 'Unit',
    't' : 'Townhouse'
}

df['type_full'] = df['type'].map(type_labels)

avg_price_by_type = df.groupby('type_full')['price'].mean().sort_values(ascending=False)

print('Average price by property type:')
print(avg_price_by_type.map(lambda x: f'${x:,.0f}'))

Average price by property type:
type_full
House        $1,242,328
Townhouse      $933,998
Unit           $605,239
Name: price, dtype: object


In [6]:
# top suburbs

top_suburbs = df.groupby('suburb')['price'].mean().sort_values(ascending=False).head(10)
print('Top 10 Suburbs by Average Price:')
print(top_suburbs.map(lambda x: f'${x:,.0f}'))

Top 10 Suburbs by Average Price:
suburb
Kooyong         $2,185,000
Canterbury      $2,180,241
Middle Park     $2,082,529
Brighton        $1,930,158
Albert Park     $1,929,447
Balwyn          $1,869,879
Eaglemont       $1,831,696
Balwyn North    $1,793,405
Kew             $1,758,435
Malvern         $1,753,856
Name: price, dtype: object
