In [2]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt


 
vic_suburbs = gpd.read_file("../data/map/LOCALITY_POLYGON.shp")
crime_df = pd.read_csv('../data/curated/crimes.csv')



In [3]:
# Rename 'GAZLOC' to gazetted locality
vic_suburbs['suburb'] = vic_suburbs['GAZLOC'].str.lower()

# Filter the dataframe to only include the gazetted localities and their geometries
vic_suburbs_filtered = vic_suburbs[['suburb', 'geometry']]

crime_df.head()


Unnamed: 0.1,Unnamed: 0,year,suburb,offence_division,offence_per_100k,offence_per_100k_prev_year,change_per_100k
0,0,2016,abbotsford,1,1220.07,1014.82,205.25
1,1,2017,abbotsford,1,1485.31,1151.65,333.66
2,2,2018,abbotsford,1,1049.65,1448.51,-398.86
3,3,2019,abbotsford,1,1824.06,1042.32,781.74
4,4,2020,abbotsford,1,1499.17,1809.35,-310.18


In [4]:
vic_suburbs_filtered.head()


Unnamed: 0,suburb,geometry
0,mollongghip,"POLYGON ((2417354.185 2445906.94, 2417260.386 ..."
1,north blackwood,"POLYGON ((2445160.214 2452798.651, 2445239.392..."
2,basalt,"POLYGON ((2421860.482 2465074.51, 2421860.053 ..."
3,llanelly,"POLYGON ((2394406.798 2527031.044, 2394351.249..."
4,murrabit west,"POLYGON ((2397518.057 2666586.745, 2397393.211..."


In [5]:
merged_df = pd.merge(crime_df, vic_suburbs_filtered, how='inner', on='suburb')
merged_df = gpd.GeoDataFrame(merged_df, geometry='geometry')

merged_2015 = merged_df[merged_df["year"] == 2023]
merged_2015_divsion2 = merged_2015[merged_2015["offence_division"] == 2]

merged_2015_divsion2['geometry'] = merged_2015_divsion2['geometry'].to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [20]:


suburbs_to_remove = ["moorabbin airport", "braeside", "melbourne airport", "wilsons promontory"]

# Remove the rows where 'suburb' matches any value in the list
gdf_filtered = merged_2015_divsion2[~merged_2015_divsion2['suburb'].isin(suburbs_to_remove)]

# Show the filtered GeoDataFrame
print(gdf_filtered)

       Unnamed: 0  year           suburb  offence_division  offence_per_100k  \
23             23  2023       aberfeldie                 2            469.09   
231           231  2023  armstrong creek                 2           2504.70   
246           239  2023            ascot                 2            236.28   
247           239  2023            ascot                 2            236.28   
303           287  2023          ashwood                 2           1694.17   
...           ...   ...              ...               ...               ...   
23711       23567  2023       wonga park                 2            295.89   
23751       23607  2023     wyndham vale                 2           2043.67   
23799       23655  2023       yarra glen                 2            191.20   
23823       23679  2023       yarraville                 2           3951.31   
23847       23703  2023           yuroke                 2              9.38   

       offence_per_100k_prev_year  chan

In [21]:
import geopandas as gpd
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import branca.colormap as branca_cm



gdf_2015 = gpd.GeoDataFrame(gdf_filtered, geometry='geometry')


# Get the min and max values of 'difference_change'
min_diff = gdf_filtered['change_per_100k'].min()
max_diff = gdf_filtered['change_per_100k'].max()

# Define a continuous color map based on 'difference_change'
colormap = cm.get_cmap('RdYlGn_r')  # 'RdYlGn' is a Red-to-Green colormap
norm = colors.Normalize(vmin=min_diff, vmax=max_diff)

# Function to map the 'difference_change' to a color
def get_color(diff_change):
    return colors.rgb2hex(colormap(norm(diff_change)))

# Create a base map centered around the approximate location of the geometries (Victoria, Australia)
m = folium.Map(location=[-37.4713, 144.7852], zoom_start=8)

# Add polygons from the 'geometry' column to the map
for _, row in gdf_2015.iterrows():
    # Get the 'difference_change' value
    difference_change = row['change_per_100k']
    
    # Create a GeoJson feature for the polygon with dynamic coloring
    geo_json = folium.GeoJson(
        data=row['geometry'].__geo_interface__,
        style_function=lambda x, diff_change=difference_change: {
            'fillColor': get_color(diff_change),  # Color based on the value of 'difference_change'
            'color': 'black',
            'weight': 1,
            'fillOpacity': 0.5
        }
    )
    
    # Add a popup to show the suburb name and difference_change
    popup = folium.Popup(f'Suburb: {row["suburb"]}<br>Difference Change: {difference_change}', max_width=200)
    geo_json.add_child(popup)
    
    # Add the polygon to the map
    geo_json.add_to(m)

# Get dynamic min and max values from the 'difference_change' column
min_diff = gdf_filtered['change_per_100k'].min()
max_diff = gdf_filtered['change_per_100k'].max()

# Create a custom HTML legend with dynamically adjusted min, midpoint, and max labels
legend_html = f"""
<div style="
    position: fixed;
    bottom: 100px;
    left: 50px;
    width: 300px;
    background-color: white;
    z-index:9999;
    font-size: 14px;
    border:2px solid grey;
    border-radius:5px;
    padding: 10px;">
    <div style="font-size: 16px; font-weight: bold;">Difference Change</div>
    <div style="display: flex; justify-content: space-between;">
        <span style="font-size: 14pt; font-weight: bold;">{min_diff}</span>
        <span style="font-size: 14pt; font-weight: bold;">0</span>
        <span style="font-size: 14pt; font-weight: bold;">{max_diff}</span>
    </div>
    <div style="height: 10px;
        background: linear-gradient(to right, green, yellow, red);">
    </div>
</div>
"""

# Add the custom HTML legend to the map
m.get_root().html.add_child(folium.Element(legend_html))

# Display the map
m.save('../plots/map_2015_difference_change.html')



  colormap = cm.get_cmap('RdYlGn_r')  # 'RdYlGn' is a Red-to-Green colormap


In [22]:
print(min_diff )
print(max_diff )

-5982.24
9467.92
