# Use the output of Time Series model plot the Geospatial Map

## Import Library

In [None]:
import  pandas as pd
import geopandas as gpd
import folium
import geopandas as gpd
import json
from IPython.display import display

## Read data

In [None]:
# Read the output data
SARIMA_data = pd.read_csv('../../data/results/price_increase_forecast_SARIMA.csv')
prophet_data = pd.read_csv('../../data/results/price_increase_forecast_prophet.csv')
rf_data =pd.read_csv('../../data/results/growth_rate_rf.csv')
lr_data = pd.read_csv('../../data/results/growth_rate_lr.csv')

In [None]:
# Unify the format of the four models
prophet_data.rename(columns={"Future Price Increase (%)": "Growth Rate"}, inplace=True)
SARIMA_data.rename(columns={"Future Price Increase (%)": "Growth Rate"}, inplace=True)
rf_data.rename(columns={"Growth_Rate (%)": "Growth Rate"}, inplace=True)
lr_data.rename(columns={"Growth_Rate (%)": "Growth Rate"}, inplace=True)
rf_data = rf_data.rename(columns={"region": "SA2"})[["SA2", "Growth Rate"]]
lr_data = lr_data.rename(columns={"region": "SA2"})[["SA2", "Growth Rate"]]

In [None]:
## Read the final merge data
final_merge = pd.read_csv('../../data/curated/final_merged_data_sa2.csv')
LGA_trans_SA2 = final_merge[["LGA", "SA2"]]

In [None]:
# Read The LGA shapefile
shp_LGA = gpd.read_file('../../data/landing/region_data/LGA_dataset/LGA_unzip/LGA_2022_AUST_GDA94.shp')
victoria_LGA = shp_LGA[shp_LGA['STE_NAME21'] == 'Victoria']

In [None]:
# Read The SA2 shapefile
shp_sa2 = gpd.read_file('../../data/landing/region_data/sa2_dataset/sa2_unzip/SA2_2021_AUST_GDA2020.shp')
victoria_sa2 = shp_sa2[shp_sa2['STE_NAME21'] == 'Victoria']

## Visualization

### The 10 regions with the highest predicted growth rate under the LGA partition

In [None]:
# Calculate the average of the predicted values of the two Time series models
avg_growth_rate_by_LGA = pd.concat([SARIMA_data, prophet_data]).groupby('LGA')['Growth Rate'].mean().reset_index()

In [None]:
avg_growth_rate_by_LGA

In [None]:
timeseries_geo = victoria_LGA.merge(avg_growth_rate_by_LGA, left_on='LGA_NAME22', right_on='LGA')

In [None]:
# The 10 regions with the highest predicted growth rates are derived
top_10_growth_timeseries = avg_growth_rate_by_LGA.nlargest(10, 'Growth Rate')

In [None]:
top_10_growth_timeseries

In [None]:
# Map the geography and highlight the 10 regions with the highest rent growth
m = folium.Map(location=[-37.81, 144.96], zoom_start=6, tiles="cartodbpositron")
geojson_data = json.loads(timeseries_geo.to_json())

folium.Choropleth(
    geo_data=geojson_data,
    name='choropleth',
    data=timeseries_geo,
    columns=['LGA', 'Growth Rate'],
    key_on='feature.properties.LGA',
    fill_color='Blues',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Projection Growth Rate (%)'
).add_to(m)

for idx, row in top_10_growth_timeseries.iterrows():
    lga_name = row['LGA']
    growth_rate = row['Growth Rate']
    # Get the centroid of the LGA geometry
    city_geometry = timeseries_geo[timeseries_geo['LGA'] == lga_name].geometry.centroid.iloc[0]
    
    # Add a marker with LGA name and growth rate
    folium.Marker(
        location=[city_geometry.y, city_geometry.x],
        popup=f'{lga_name}: {growth_rate}%',
        icon=folium.DivIcon(html=f"""<div style="font-size: 12px; color: black">{lga_name}</div>""")
    ).add_to(m)
    
display(m)

m.save('../../plots/geo(html)/Timeseries_Output_geo.html')

### The 10 regions with the highest predicted growth rate under the SA2

In [None]:
# Calculate the average of the predicted values of the two Regression model
avg_growth_rate_by_SA2 = pd.concat([lr_data, rf_data]).groupby('SA2')['Growth Rate'].mean().reset_index()

In [None]:
# normalize the name of SA2
victoria_sa2['SA2_NAME21_normalized'] = victoria_sa2['SA2_NAME21'].str.replace(' ', '').str.replace('-', '').str.lower()
avg_growth_rate_by_SA2['SA2_normalized'] = avg_growth_rate_by_SA2['SA2'].str.replace(' ', '').str.replace('-', '').str.lower()

In [None]:
regression_geo = victoria_sa2.merge(avg_growth_rate_by_SA2, left_on='SA2_NAME21_normalized', right_on='SA2_normalized')

In [None]:
top_10_growth_regression = avg_growth_rate_by_SA2.nlargest(10, 'Growth Rate')

In [None]:
top_10_growth_regression

In [None]:
m = folium.Map(location=[-37.81, 144.96], zoom_start=6, tiles="cartodbpositron")
geojson_data = json.loads(regression_geo.to_json())

folium.Choropleth(
    geo_data=geojson_data,
    name='choropleth',
    data=regression_geo,
    columns=['SA2_normalized', 'Growth Rate'],
    key_on='feature.properties.SA2_NAME21_normalized',
    fill_color='Blues',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Projection Growth Rate (%)'
).add_to(m)

for idx, row in top_10_growth_regression.iterrows():
    sa2_name = row['SA2_normalized']
    growth_rate = row['Growth Rate']
    city_geometry = regression_geo[regression_geo['SA2_normalized'] == sa2_name].geometry.centroid
    
    if not city_geometry.empty:
        city_geometry = city_geometry.iloc[0]
        folium.Marker(
            location=[city_geometry.y, city_geometry.x],
            popup=f'{sa2_name}: {growth_rate}%',
            icon=folium.DivIcon(html=f"""<div style="font-size: 12px; color: black">{sa2_name}</div>""")
        ).add_to(m)
        
display(m)
m.save('../../plots/geo(html)/Regresssion_Output_geo.html')

### The output of the four models is combined for analysis(median)

In [None]:
growth_rate_prophet = pd.merge(LGA_trans_SA2,prophet_data , on='LGA', how='inner')
growth_rate_SARIMA = pd.merge(LGA_trans_SA2,SARIMA_data , on='LGA', how='inner')
growth_rate_SARIMA['SA2_normalized'] = growth_rate_SARIMA['SA2'].str.replace(' ', '').str.replace('-', '').str.lower()
growth_rate_prophet['SA2_normalized'] = growth_rate_prophet['SA2'].str.replace(' ', '').str.replace('-', '').str.lower()

In [None]:
lr_data['SA2_normalized'] = lr_data['SA2'].str.replace(' ', '').str.replace('-', '').str.lower()
rf_data['SA2_normalized'] = rf_data['SA2'].str.replace(' ', '').str.replace('-', '').str.lower()

In [None]:
sarima_growth = growth_rate_SARIMA[['SA2_normalized', 'Growth Rate']].rename(columns={'Growth Rate': 'Growth Rate_SARIMA'})
prophet_growth = growth_rate_prophet[['SA2_normalized', 'Growth Rate']].rename(columns={'Growth Rate': 'Growth Rate_prophet'})
lr_growth = lr_data[['SA2_normalized', 'Growth Rate']].rename(columns={'Growth Rate': 'Growth Rate_lr'})
rf_growth = rf_data[['SA2_normalized', 'Growth Rate']].rename(columns={'Growth Rate': 'Growth Rate_rf'})

In [None]:
merged_growth_data = sarima_growth.merge(prophet_growth, on='SA2_normalized', how='outer')
merged_growth_data = merged_growth_data.merge(lr_growth, on='SA2_normalized', how='outer')
merged_growth_data = merged_growth_data.merge(rf_growth, on='SA2_normalized', how='outer')

In [None]:
merged_growth_data['Mean_Growth_Rate'] = merged_growth_data[['Growth Rate_SARIMA', 'Growth Rate_prophet', 'Growth Rate_lr', 'Growth Rate_rf']].mean(axis=1)

In [None]:
merged_growth_data

In [None]:
geo = victoria_sa2.merge(merged_growth_data, left_on='SA2_NAME21_normalized', right_on='SA2_normalized')

In [None]:
top_10_growth = geo.nlargest(10, 'Mean_Growth_Rate')

In [None]:
top_10_growth[['SA2_NAME21','Mean_Growth_Rate']]

In [None]:
m = folium.Map(location=[-37.81, 144.96], zoom_start=6, tiles="cartodbpositron")
geojson_data = json.loads(geo.to_json())

folium.Choropleth(
    geo_data=geojson_data,
    name='choropleth',
    data=geo,
    columns=['SA2_normalized', 'Mean_Growth_Rate'],
    key_on='feature.properties.SA2_NAME21_normalized',
    fill_color='Blues',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Projection Growth Rate (%)'
).add_to(m)
        
display(m)
m.save('../../plots/geo(html)/final_Output_median_geo.html')

In [None]:
m = folium.Map(location=[-37.81, 144.96], zoom_start=6, tiles="cartodbpositron")
geojson_data = json.loads(geo.to_json())
folium.Choropleth(
    geo_data=geojson_data,
    name='choropleth',
    data=geo,
    columns=['SA2_normalized', 'Mean_Growth_Rate'],
    key_on='feature.properties.SA2_NAME21_normalized',
    fill_color='Blues',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Projection Growth Rate (%)'
).add_to(m)

for idx, row in top_10_growth.iterrows():
    sa2_name = row['SA2_NAME21']
    mean_growth_rate = row['Mean_Growth_Rate']
    
    city_geometry = geo[geo['SA2_NAME21'] == sa2_name].geometry.centroid
    
    if not city_geometry.empty:
        city_geometry = city_geometry.iloc[0]
        
        text_location = [city_geometry.y, city_geometry.x]
        
        folium.Marker(
            location=text_location,
            popup=f'{sa2_name}: {mean_growth_rate}%',
            icon=folium.DivIcon(html=f"""<div style="font-size: 12px; color: black; text-shadow: 0px 0px 3px white; white-space: nowrap;">{sa2_name}</div>""")
        ).add_to(m)

display(m)
m.save('../../plots/geo(html)/final_Output_median_geo_with_labels.html')
