In [None]:
# Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor

# Load the preprocessed data
data_full = pd.read_csv('data/curated/final_data_full.csv')
data_num = pd.read_csv('data/curated/final_data_num.csv') 

# Using the data from these two files to generate visualizations

1. Feature importance graph

In [2]:
# Feature Importance Graph (Random Forest)
X = data_full.drop(columns='price')  # Features
y = data_full['price']  # Target

# Train a Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# Get feature importance scores from the model
importance_scores = rf_model.feature_importances_

# Create a DataFrame for feature importance
feature_importance = pd.DataFrame({'Feature': X.columns, 'Importance': importance_scores})
feature_importance.sort_values(by='Importance', ascending=False, inplace=True)

# Plot the feature importance graph
plt.figure(figsize=(8, 6))
bars = plt.bar(feature_importance['Feature'], feature_importance['Importance'], color='#FF9999')
plt.title('Feature Importance Graph')
plt.xlabel('Feature')
plt.ylabel('Importance')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

2. Correlation heatmap between features

In [None]:
# Correlation Heatmap Between Features
corr_matrix = data_full.corr()
plt.figure(figsize=(12, 8))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", fmt='.2f')
plt.title('Correlation Heatmap Between Features')
plt.show()

3. Top 10 suburbs with highest predicted growth rate

In [4]:
# Top 10 Suburbs with Highest Predicted Growth Rate
growth_rates = data_growth.groupby('suburb')['growth_rate'].mean().sort_values(ascending=False).head(10)
plt.figure(figsize=(10, 6))
bars = plt.bar(growth_rates.index, growth_rates.values, color=['#FF9999', '#66B2FF', '#99FF99', '#FFCC99', '#FF9966'])
plt.title('Top 10 Suburbs with Highest Predicted Growth Rate')
plt.xlabel('Suburb')
plt.ylabel('Predicted Growth Rate (%)')
plt.xticks(rotation=45)

for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2.0, height, f'{height:.1f}%', ha='center', va='bottom')

plt.tight_layout()
plt.show()

4. Projected rental growth over 5 years

In [6]:
# Projected Rental Price Growth Over 5 Years
suburbs = ['West Melbourne', 'Docklands', 'Brighton', 'Berwick', 'Hawthorn', 'Glen Iris', 'Richmond', 'Essendon', 'CBD', 'Kew']
years = [0, 1, 2, 3, 4, 5]
projected_prices = {
    'West Melbourne': [1.0, 1.1, 1.2, 1.3, 1.4, 1.5],
    'Docklands': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    'Brighton': [0.8, 0.9, 1.0, 1.1, 1.2, 1.3],
}

plt.figure(figsize=(10, 6))
for suburb, prices in projected_prices.items():
    plt.plot(years, prices, marker='o', label=suburb)

plt.title('Projected Rental Price Growth Over 5 Years')
plt.xlabel('Year')
plt.ylabel('Projected Rental Price ($)')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()

5. Median rental prices of top 7 liveable and affordable suburbs

In [8]:
# Median Rental Prices of Top 7 Liveable and Affordable Suburbs
median_prices = data_full.groupby('suburb')['price'].median().sort_values(ascending=True)
plt.figure(figsize=(10, 6))
bars = plt.bar(median_prices.index[:7], median_prices.values[:7], color='#FF6666')
plt.title('Median Rental Prices of Top 7 Liveable and Affordable Suburbs')
plt.xlabel('Suburb')
plt.ylabel('Median Rental Price ($)')
plt.xticks(rotation=45)

for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2.0, height / 2, f'{height:.2f}', ha='center', va='center', color='black')

plt.tight_layout()
plt.show()

6. Liveability scores of top 7 affordable suburbs

In [None]:
# Liveability Scores of Top 7 Affordable Suburbs
data_liveability['liveability_score'] = (
    data_liveability['school_count'] + 
    data_liveability['park_count'] + 
    data_liveability['shopping_center_count'] + 
    data_liveability['station_count']
)

liveability_scores = data_liveability.groupby('suburb')['liveability_score'].sum().sort_values(ascending=False).head(7)
plt.figure(figsize=(10, 6))
bars = plt.bar(liveability_scores.index, liveability_scores.values, color='#66B2FF')
plt.title('Liveability Scores of Top 7 Affordable Suburbs')
plt.xlabel('Suburb')
plt.ylabel('Liveability Score')
plt.xticks(rotation=45)

for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2.0, height / 2, f'{height:.1f}', ha='center', va='center', color='black')

plt.tight_layout()
plt.show()