# Integrated the Predictions

In [1]:
import pandas as pd
import geopandas as gpd
import folium

In [2]:
ml_model = pd.read_csv('../data/curated/ML_growth_rate.csv')
arima_model = pd.read_csv('../data/curated/ARIMA_growth_rate.csv')

## Take the Mean of Two Predictions

In [3]:
# Concatenate the two DataFrames vertically to combine them
combined_df = pd.concat([ml_model, arima_model], ignore_index=True)

# Group the combined DataFrame by 'postcode' and calculate the mean of 'Growth_Rate'
mean_growth_rate = combined_df.groupby('postcode')['Growth_Rate'].mean().reset_index()
mean_growth_rate['Growth_Rate'] = pd.to_numeric(mean_growth_rate['Growth_Rate'], errors='coerce')
mean_growth_rate = mean_growth_rate.sort_values(by='Growth_Rate', ascending=False)
mean_growth_rate.head()

Unnamed: 0,postcode,Growth_Rate
410,3546,86.067364
413,3551,86.067364
409,3544,86.067364
407,3540,86.067364
411,3549,86.067364


## Show TOP10 Suburbs

Combine the postcodes with same Growth Rate

In [4]:
grouped = mean_growth_rate.groupby('Growth_Rate')['postcode'].apply(list).reset_index()
sorted_grouped = grouped.sort_values(by='Growth_Rate', ascending=False)
sorted_grouped = sorted_grouped.nlargest(11, 'Growth_Rate')[['postcode', 'Growth_Rate']]
sorted_grouped = sorted_grouped.drop(352) # invalid value

sorted_grouped.to_csv('../data/curated/combined_top10.csv', index=False)
sorted_grouped

Unnamed: 0,postcode,Growth_Rate
358,"[3546, 3551, 3544, 3540, 3549, 3542]",86.067364
357,"[3345, 3270, 3271, 3272]",80.898826
356,"[3475, 3444, 3472, 3489, 3488, 3487]",59.616612
355,"[3413, 3412, 3334, 3331, 3336, 3409, 3333, 341...",54.366999
354,[3335],44.78316
353,"[3725, 3719, 3738]",44.719061
351,[3350],43.023342
350,[3747],40.050704
349,"[3352, 3354]",37.827677
348,"[3243, 3269, 3268, 3250, 3249, 3242, 3241, 323...",36.62289


## Label the TOP10 Suburbs

In [5]:
gdf = gpd.read_file('../data/landing/POA_2021_AUST_GDA2020_SHP/POA_2021_AUST_GDA2020.shp')
gdf = gdf.drop(gdf.index[-1])
gdf['POA_CODE21'] = gdf['POA_CODE21'].astype(int)

postcodes = sorted_grouped['postcode'].tolist()

In [6]:
m = folium.Map(
    location=[-37.8136, 144.9631],
    tiles='OpenStreetMap',
    zoom_start=10
)

# Iterate through the list of postcodes
for postcode_list in postcodes:
    # Combine the areas
    combined_area = gdf[gdf['POA_CODE21'].isin(postcode_list)].unary_union

    # Calculate the center coordinates of the combined area
    center = combined_area.centroid
    center_coords = [center.y, center.x]  # Latitude and longitude

    # Create a marker with a label
    label = ', '.join(map(str, postcode_list))
    marker = folium.Marker(location=center_coords, popup=label)
    marker.add_to(m)

m.save('../plots/top10_map.html')

m
