In [3]:
import pandas as pd
import geopandas as gpd


 
vic_suburbs = gpd.read_file("../../data/map/LOCALITY_POLYGON.shp")
crime_df = pd.read_csv('../../data/curated/crimes.csv')



In [4]:
# Rename 'GAZLOC' to gazetted locality
vic_suburbs['suburb'] = vic_suburbs['GAZLOC'].str.lower()

# Filter the dataframe to only include the gazetted localities and their geometries
vic_suburbs_filtered = vic_suburbs[['suburb', 'geometry']]

crime_df.head()


Unnamed: 0.1,Unnamed: 0,year,suburb,offence_division,offence_per_100k,offence_per_100k_prev_year,change_per_100k
0,0,2016,abbotsford,1,1220.07,1014.82,205.25
1,1,2017,abbotsford,1,1485.31,1151.65,333.66
2,2,2018,abbotsford,1,1049.65,1448.51,-398.86
3,3,2019,abbotsford,1,1824.06,1042.32,781.74
4,4,2020,abbotsford,1,1499.17,1809.35,-310.18


In [5]:
merged_df = pd.merge(crime_df, vic_suburbs_filtered, how='inner', on='suburb')
merged_df = gpd.GeoDataFrame(merged_df, geometry='geometry')

merged_df['geometry'] = merged_df['geometry'].to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")

#for time slider dataframe
offence_division = 1
all_years_offence_division1 = merged_df[merged_df["offence_division"] == offence_division]

offence_division = 2
all_years_offence_division2 = merged_df[merged_df["offence_division"] == offence_division]

offence_division = 3
all_years_offence_division3 = merged_df[merged_df["offence_division"] == offence_division]

offence_division = 4
all_years_offence_division4 = merged_df[merged_df["offence_division"] == offence_division]

offence_division = 5
all_years_offence_division5 = merged_df[merged_df["offence_division"] == offence_division]

offence_division = 6
all_years_offence_division6 = merged_df[merged_df["offence_division"] == offence_division]




In [6]:

#remove the subrubs where there isnt really a population, such as "airports, and camping areas"
suburbs_to_remove = ["moorabbin airport", "braeside", "melbourne airport", "wilsons promontory"]

# Remove the rows where 'suburb' matches any value in the list
gdf_filtered_time1 = all_years_offence_division1[~all_years_offence_division1['suburb'].isin(suburbs_to_remove)]
gdf_filtered_time2 = all_years_offence_division2[~all_years_offence_division2['suburb'].isin(suburbs_to_remove)]
gdf_filtered_time3 = all_years_offence_division3[~all_years_offence_division3['suburb'].isin(suburbs_to_remove)]
gdf_filtered_time4 = all_years_offence_division4[~all_years_offence_division4['suburb'].isin(suburbs_to_remove)]
gdf_filtered_time5 = all_years_offence_division5[~all_years_offence_division5['suburb'].isin(suburbs_to_remove)]
gdf_filtered_time6 = all_years_offence_division6[~all_years_offence_division6['suburb'].isin(suburbs_to_remove)]

### Below is the code for creating the moving bar graphs

In [8]:
import plotly.express as px
import plotly.io as pio


# Filter the GeoDataFrame for the selected offence division and years 2021-2023
selected_offence_division = 2  
gdf_filtered_by_crime = gdf_filtered_time2[
    (gdf_filtered_time2['offence_division'] == selected_offence_division) &
    (gdf_filtered_time2['year'] >= 2019) & (gdf_filtered_time2['year'] <= 2023)
]

# For each year, find the top 5 suburbs with the highest 'offence_per_100k'
top_5_by_year = gdf_filtered_by_crime.groupby('year').apply(
    lambda x: x.nlargest(5, 'offence_per_100k')
).reset_index(drop=True)

# Sort by year and offence_per_100k for clarity in the animation
top_5_by_year = top_5_by_year.sort_values(['year', 'offence_per_100k'], ascending=[True, False])

# Create a time slider bar chart using Plotly
fig = px.bar(
    top_5_by_year,
    x='suburb',  # Display suburb on the x-axis
    y='offence_per_100k',  # Display offence_per_100k on the y-axis
    color='suburb',  # Color by suburb
    labels={'offence_per_100k': 'Offence per 100,000 people', 'suburb': 'Suburb'},
    title=f'Top 5 Suburbs with Highest Offence per 100,000 people (Division {selected_offence_division})',
    animation_frame='year',  # Use year for the animation frame
    range_y=[0, top_5_by_year['offence_per_100k'].max() * 1.1],  # Dynamic range for y-axis
    category_orders={"suburb": list(top_5_by_year['suburb'])}  # Ensure x-axis respects dynamic suburbs per year
)

# Customize layout
fig.update_layout(
    xaxis_title='Suburb',
    yaxis_title='Offences per 100,000 people',
    updatemenus=[dict(type="buttons", showactive=False,
                      buttons=[dict(label="Play",
                                    method="animate",
                                    args=[None, {"frame": {"duration": 1000, "redraw": True},
                                                 "fromcurrent": True, "mode": "immediate"}]),
                               dict(label="Pause",
                                    method="animate",
                                    args=[[None], {"frame": {"duration": 0, "redraw": False},
                                                   "mode": "immediate", "fromcurrent": True}])])],
)

# Show the figure
pio.write_html(fig, file="../../plots/crime_visualisations/top_5_suburbs_offence_2.html", auto_open=False)  # auto_open=True to open it automatically







In [9]:



# Filter the GeoDataFrame for the selected offence division and years 2021-2023
selected_offence_division = 1  
gdf_filtered_by_crime = gdf_filtered_time1[
    (gdf_filtered_time1['offence_division'] == selected_offence_division) &
    (gdf_filtered_time1['year'] >= 2019) & (gdf_filtered_time1['year'] <= 2023)
]

# For each year, find the top 5 suburbs with the highest 'offence_per_100k'
top_5_by_year = gdf_filtered_by_crime.groupby('year').apply(
    lambda x: x.nlargest(5, 'offence_per_100k')
).reset_index(drop=True)

# Sort by year and offence_per_100k for clarity in the animation
top_5_by_year = top_5_by_year.sort_values(['year', 'offence_per_100k'], ascending=[True, False])

# Create a time slider bar chart using Plotly
fig = px.bar(
    top_5_by_year,
    x='suburb',  # Display suburb on the x-axis
    y='offence_per_100k',  # Display offence_per_100k on the y-axis
    color='suburb',  # Color by suburb
    labels={'offence_per_100k': 'Offence per 100,000 people', 'suburb': 'Suburb'},
    title=f'Top 5 Suburbs with Highest Offence per 100,000 people (Division {selected_offence_division})',
    animation_frame='year',  # Use year for the animation frame
    range_y=[0, top_5_by_year['offence_per_100k'].max() * 1.1],  # Dynamic range for y-axis
    category_orders={"suburb": list(top_5_by_year['suburb'])}  # Ensure x-axis respects dynamic suburbs per year
)

# Customize layout for better visualization
fig.update_layout(
    xaxis_title='Suburb',
    yaxis_title='Offences per 100,000 people',
    updatemenus=[dict(type="buttons", showactive=False,
                      buttons=[dict(label="Play",
                                    method="animate",
                                    args=[None, {"frame": {"duration": 1000, "redraw": True},
                                                 "fromcurrent": True, "mode": "immediate"}]),
                               dict(label="Pause",
                                    method="animate",
                                    args=[[None], {"frame": {"duration": 0, "redraw": False},
                                                   "mode": "immediate", "fromcurrent": True}])])],
)

# Show the figure
pio.write_html(fig, file="../../plots/crime_visualisations/top_5_suburbs_offence_1.html", auto_open=False)  # auto_open=True to open it automatically







In [10]:



# Filter the GeoDataFrame for the selected offence division and years 2021-2023
selected_offence_division = 3  # You can change this to any division you want
gdf_filtered_by_crime = gdf_filtered_time3[
    (gdf_filtered_time3['offence_division'] == selected_offence_division) &
    (gdf_filtered_time3['year'] >= 2019) & (gdf_filtered_time3['year'] <= 2023)
]

# For each year, find the top 5 suburbs with the highest 'offence_per_100k'
top_5_by_year = gdf_filtered_by_crime.groupby('year').apply(
    lambda x: x.nlargest(5, 'offence_per_100k')
).reset_index(drop=True)

# Sort by year and offence_per_100k for clarity in the animation
top_5_by_year = top_5_by_year.sort_values(['year', 'offence_per_100k'], ascending=[True, False])

# Create a time slider bar chart using Plotly
fig = px.bar(
    top_5_by_year,
    x='suburb',  # Display suburb on the x-axis
    y='offence_per_100k',  # Display offence_per_100k on the y-axis
    color='suburb',  # Color by suburb
    labels={'offence_per_100k': 'Offence per 100,000 people', 'suburb': 'Suburb'},
    title=f'Top 5 Suburbs with Highest Offence per 100,000 people (Division {selected_offence_division})',
    animation_frame='year',  # Use year for the animation frame
    range_y=[0, top_5_by_year['offence_per_100k'].max() * 1.1],  # Dynamic range for y-axis
    category_orders={"suburb": list(top_5_by_year['suburb'])}  # Ensure x-axis respects dynamic suburbs per year
)

# Customize layout for better visualization
fig.update_layout(
    xaxis_title='Suburb',
    yaxis_title='Offences per 100,000 people',
    updatemenus=[dict(type="buttons", showactive=False,
                      buttons=[dict(label="Play",
                                    method="animate",
                                    args=[None, {"frame": {"duration": 1000, "redraw": True},
                                                 "fromcurrent": True, "mode": "immediate"}]),
                               dict(label="Pause",
                                    method="animate",
                                    args=[[None], {"frame": {"duration": 0, "redraw": False},
                                                   "mode": "immediate", "fromcurrent": True}])])],
)

# Show the figure
pio.write_html(fig, file="../../plots/crime_visualisations/top_5_suburbs_offence_3.html", auto_open=False)  # auto_open=True to open it automatically







In [11]:



# Filter the GeoDataFrame for the selected offence division and years 2021-2023
selected_offence_division = 4  # You can change this to any division you want
gdf_filtered_by_crime = gdf_filtered_time4[
    (gdf_filtered_time4['offence_division'] == selected_offence_division) &
    (gdf_filtered_time4['year'] >= 2019) & (gdf_filtered_time4['year'] <= 2023)
]

# For each year, find the top 5 suburbs with the highest 'offence_per_100k'
top_5_by_year = gdf_filtered_by_crime.groupby('year').apply(
    lambda x: x.nlargest(5, 'offence_per_100k')
).reset_index(drop=True)

# Sort by year and offence_per_100k for clarity in the animation
top_5_by_year = top_5_by_year.sort_values(['year', 'offence_per_100k'], ascending=[True, False])

# Create a time slider bar chart using Plotly
fig = px.bar(
    top_5_by_year,
    x='suburb',  # Display suburb on the x-axis
    y='offence_per_100k',  # Display offence_per_100k on the y-axis
    color='suburb',  # Color by suburb
    labels={'offence_per_100k': 'Offence per 100k', 'suburb': 'Suburb'},
    title=f'Top 5 Suburbs with Highest Offence 100,000 people (Division {selected_offence_division})',
    animation_frame='year',  # Use year for the animation frame
    range_y=[0, top_5_by_year['offence_per_100k'].max() * 1.1],  # Dynamic range for y-axis
    category_orders={"suburb": list(top_5_by_year['suburb'])}  # Ensure x-axis respects dynamic suburbs per year
)

# Customize layout for better visualization
fig.update_layout(
    xaxis_title='Suburb',
    yaxis_title='Offences per 100,000 people',
    updatemenus=[dict(type="buttons", showactive=False,
                      buttons=[dict(label="Play",
                                    method="animate",
                                    args=[None, {"frame": {"duration": 1000, "redraw": True},
                                                 "fromcurrent": True, "mode": "immediate"}]),
                               dict(label="Pause",
                                    method="animate",
                                    args=[[None], {"frame": {"duration": 0, "redraw": False},
                                                   "mode": "immediate", "fromcurrent": True}])])],
)

# Show the figure
pio.write_html(fig, file="../../plots/crime_visualisations/top_5_suburbs_offence_4.html", auto_open=False)  # auto_open=True to open it automatically







In [12]:



# Filter the GeoDataFrame for the selected offence division and years 2021-2023
selected_offence_division = 5  # You can change this to any division you want
gdf_filtered_by_crime = gdf_filtered_time5[
    (gdf_filtered_time5['offence_division'] == selected_offence_division) &
    (gdf_filtered_time5['year'] >= 2019) & (gdf_filtered_time5['year'] <= 2023)
]

# For each year, find the top 5 suburbs with the highest 'offence_per_100k'
top_5_by_year = gdf_filtered_by_crime.groupby('year').apply(
    lambda x: x.nlargest(5, 'offence_per_100k')
).reset_index(drop=True)

# Sort by year and offence_per_100k for clarity in the animation
top_5_by_year = top_5_by_year.sort_values(['year', 'offence_per_100k'], ascending=[True, False])

# Create a time slider bar chart using Plotly
fig = px.bar(
    top_5_by_year,
    x='suburb',  # Display suburb on the x-axis
    y='offence_per_100k',  # Display offence_per_100k on the y-axis
    color='suburb',  # Color by suburb
    labels={'offence_per_100k': 'Offence per 100k', 'suburb': 'Suburb'},
    title=f'Top 5 Suburbs with Highest Offence per 100,000 people (Division {selected_offence_division})',
    animation_frame='year',  # Use year for the animation frame
    range_y=[0, top_5_by_year['offence_per_100k'].max() * 1.1],  # Dynamic range for y-axis
    category_orders={"suburb": list(top_5_by_year['suburb'])}  # Ensure x-axis respects dynamic suburbs per year
)

# Customize layout for better visualization
fig.update_layout(
    xaxis_title='Suburb',
    yaxis_title='Offences per 100,000 people',
    updatemenus=[dict(type="buttons", showactive=False,
                      buttons=[dict(label="Play",
                                    method="animate",
                                    args=[None, {"frame": {"duration": 1000, "redraw": True},
                                                 "fromcurrent": True, "mode": "immediate"}]),
                               dict(label="Pause",
                                    method="animate",
                                    args=[[None], {"frame": {"duration": 0, "redraw": False},
                                                   "mode": "immediate", "fromcurrent": True}])])],
)

# Show the figure
pio.write_html(fig, file="../../plots/crime_visualisations/top_5_suburbs_offence_5.html", auto_open=False)  # auto_open=True to open it automatically







In [13]:



# Filter the GeoDataFrame for the selected offence division and years 2021-2023
selected_offence_division = 6  # You can change this to any division you want
gdf_filtered_by_crime = gdf_filtered_time6[
    (gdf_filtered_time6['offence_division'] == selected_offence_division) &
    (gdf_filtered_time6['year'] >= 2019) & (gdf_filtered_time6['year'] <= 2023)
]

# For each year, find the top 5 suburbs with the highest 'offence_per_100k'
top_5_by_year = gdf_filtered_by_crime.groupby('year').apply(
    lambda x: x.nlargest(5, 'offence_per_100k')
).reset_index(drop=True)

# Sort by year and offence_per_100k for clarity in the animation
top_5_by_year = top_5_by_year.sort_values(['year', 'offence_per_100k'], ascending=[True, False])

# Create a time slider bar chart using Plotly
fig = px.bar(
    top_5_by_year,
    x='suburb',  # Display suburb on the x-axis
    y='offence_per_100k',  # Display offence_per_100k on the y-axis
    color='suburb',  # Color by suburb
    labels={'offence_per_100k': 'Offence per 100,000 people', 'suburb': 'Suburb'},
    title=f'Top 5 Suburbs with Highest Offence per 100,000 people (Division {selected_offence_division})',
    animation_frame='year',  # Use year for the animation frame
    range_y=[0, top_5_by_year['offence_per_100k'].max() * 1.1],  # Dynamic range for y-axis
    category_orders={"suburb": list(top_5_by_year['suburb'])}  # Ensure x-axis respects dynamic suburbs per year
)

# Customize layout for better visualization
fig.update_layout(
    xaxis_title='Suburb',
    yaxis_title='Offences per 100,000 people',
    updatemenus=[dict(type="buttons", showactive=False,
                      buttons=[dict(label="Play",
                                    method="animate",
                                    args=[None, {"frame": {"duration": 1000, "redraw": True},
                                                 "fromcurrent": True, "mode": "immediate"}]),
                               dict(label="Pause",
                                    method="animate",
                                    args=[[None], {"frame": {"duration": 0, "redraw": False},
                                                   "mode": "immediate", "fromcurrent": True}])])],
)

# Show the figure
pio.write_html(fig, file="../../plots/crime_visualisations/top_5_suburbs_offence_6.html", auto_open=False)  # auto_open=True to open it automatically







In [14]:
import plotly.io as pio
import plotly.graph_objs as go


# Define a dictionary that maps each division to its corresponding DataFrame
gdf_by_division = {
    5: gdf_filtered_time5,
    4: gdf_filtered_time4,
    3: gdf_filtered_time3,
    2: gdf_filtered_time2,
    1: gdf_filtered_time1,
}

# Create a figure to populate with traces for each offence division and year
fig = go.Figure()

# Iterate through each division and create traces for different years
for division, gdf_filtered_by_crime in gdf_by_division.items():
    # Filter the GeoDataFrame for years 2019-2023
    gdf_filtered_by_crime = gdf_filtered_by_crime[
        (gdf_filtered_by_crime['year'] >= 2019) & (gdf_filtered_by_crime['year'] <= 2023)
    ]

    # For each year, find the top 5 suburbs with the highest 'offence_per_100k'
    top_5_by_year = gdf_filtered_by_crime.groupby('year').apply(
        lambda x: x.nlargest(5, 'offence_per_100k')
    ).reset_index(drop=True)

    # Sort by year and offence_per_100k for clarity in the animation
    top_5_by_year = top_5_by_year.sort_values(['year', 'offence_per_100k'], ascending=[True, False])

    # Create frames for each year within the current division
    years = top_5_by_year['year'].unique()

    for year in years:
        yearly_data = top_5_by_year[top_5_by_year['year'] == year]

        # Add traces for each year for this division
        fig.add_trace(go.Bar(
            x=yearly_data['suburb'],
            y=yearly_data['offence_per_100k'],
            name=f'Year {year}',  # Label with division and year
            visible=False  # Hide all traces initially
        ))


# Create the dropdown menu to select between different offence divisions
dropdown_buttons = [
    dict(
        label=f"Division {division}",
        method="update",
        args=[{
            "visible": [i // len(years) == idx for i in range(len(gdf_by_division) * len(years))],  # Show traces for the selected division
            "title": f"Top 5 Suburbs with Highest Offences per 100,000 people (Division {division})"
        }]
    ) for idx, division in enumerate(gdf_by_division.keys())
]

# Update the layout to include both the dropdown menu and time slider
fig.update_layout(
    title_text="Top 5 Suburbs with Highest Offence per 100,000 people",
    xaxis_title="Suburb",
    yaxis_title="Offences per 100,000 people",
    updatemenus=[{
        "buttons": dropdown_buttons,
        "direction": "down",
        "showactive": True,
        "x": 1.17,
        "y": 1.15,
        "xanchor": "left",
        "yanchor": "top"
    }]
)


# Save the figure as an HTML file
pio.write_html(fig, file="../../plots/crime_visualisations/top_5_suburbs_offence_dropdown_animation.html", auto_open=False)














### Verifying Graphs and data analysis

In [15]:
def top_crimes(year, division, crime_df=crime_df):
    '''Input: for a given year, and crime division
    
    Output: return a list of the top 10 highest crime rate suburbs'''

    filtered_df_year = crime_df[crime_df['year'] == year]
    filtered_df_year_crime = filtered_df_year[filtered_df_year["offence_division"] == division]

    #remove some subrubs
    suburbs_to_remove = ["moorabbin airport", "braeside", "melbourne airport", "wilsons promontory"]

    # Remove the rows where 'suburb' matches any value in the list
    filtered_df_year_crime = filtered_df_year_crime[~filtered_df_year_crime['suburb'].isin(suburbs_to_remove)]



    largest = filtered_df_year_crime.nlargest(10, "offence_per_100k")

    return list(largest["suburb"].unique())



In [16]:
#I want to know the common suburbs in each crime divison over the years
years = [2019, 2020, 2021, 2022, 2023]
crime_divisions = [1,2,3,4,5]

#for each of the crime divisions
for crime in crime_divisions:
    common_suburb = []
    #find the common suburbs which are consistently in the top 10 highest crime rate areas over 2019-2023
    for year in years:
        if len(common_suburb) == 0:
            common_suburb = top_crimes(year, crime)
        else:
            common_suburb = list(set(top_crimes(year, crime)) & set(common_suburb))
    print(f'suburbs which consistently come up for division: {crime} are {common_suburb}')


suburbs which consistently come up for division: 1 are ['melbourne', 'morwell', 'melton', 'frankston', 'norlane']
suburbs which consistently come up for division: 2 are ['melbourne', 'melton', 'collingwood', 'fitzroy', 'sunshine']
suburbs which consistently come up for division: 3 are ['frankston', 'melbourne', 'southbank']
suburbs which consistently come up for division: 4 are ['melbourne', 'morwell', 'sunshine', 'frankston', 'geelong', 'east melbourne']
suburbs which consistently come up for division: 5 are ['morwell', 'sunshine', 'frankston', 'broadmeadows', 'ballarat central']


In [17]:
def lowest_crimes(year, division, crime_df=crime_df):  
    '''Input: for a given year, and crime division
    
    Output: return a list of the top 10 highest crime rate suburbs'''

    filtered_df_year = crime_df[crime_df['year'] == year]
    filtered_df_year_crime = filtered_df_year[filtered_df_year["offence_division"] == division]

    #remove some subrubs
    suburbs_to_remove = ["moorabbin airport", "braeside", "melbourne airport", "wilsons promontory"]

    # Remove the rows where 'suburb' matches any value in the list
    filtered_df_year_crime = filtered_df_year_crime[~filtered_df_year_crime['suburb'].isin(suburbs_to_remove)]
    filtered_df_year_crime = filtered_df_year_crime[filtered_df_year_crime['offence_per_100k']>0]




    smallest = filtered_df_year_crime.nsmallest(10, "offence_per_100k")

    return list(smallest["suburb"].unique())

In [18]:
#I want to know the common suburbs in each crime divison over the years
years = [2019, 2020, 2021, 2022, 2023]
crime_divisions = [1,2,3,4,5]

#for each of the crime divisions
for crime in crime_divisions:
    common_suburb = []
    #find the common suburbs which are consistently in the top 10 highest crime rate areas over 2019-2023
    for year in years:
        if len(common_suburb) == 0:
            common_suburb = lowest_crimes(year, crime)
        else:
            common_suburb = list(set(lowest_crimes(year, crime)) & set(common_suburb))
    print(f'suburbs which consistently come up for division: {crime} are {common_suburb}')

suburbs which consistently come up for division: 1 are ['yuroke', 'invermay']
suburbs which consistently come up for division: 2 are ['towong', 'yuroke', 'narre warren east', 'quandong']
suburbs which consistently come up for division: 3 are []
suburbs which consistently come up for division: 4 are ['yuroke']
suburbs which consistently come up for division: 5 are ['yuroke', 'plumpton', 'quandong', 'invermay', 'toolern vale', 'yackandandah', 'panton hill', 'wattle glen', 'yarrambat', 'mount cottrell']


### Zoom in on Melbourne's crime rate

We saw that amongst the suburbs with the highest crime rates across the divisions that Melbourne, Morewell, Sunshine and Frankston rank amongst the highest offending subrubs.

Lets make a visualisation to graph the crime rates over the period 2019-2023

In [19]:


suburbs_to_include = ["melbourne", "morwell", "sunshine", "frankston"]
melbourne_df = merged_df[merged_df["suburb"].isin(suburbs_to_include)]
melbourne_df = melbourne_df[melbourne_df["year"] >= 2019]
melbourne_df = melbourne_df[["year", "suburb", "offence_per_100k"]]


# Sum the offence_per_100k for each year and suburb
summed_df = melbourne_df.groupby(['year', 'suburb'], as_index=False)['offence_per_100k'].sum()

# Create the line chart with Plotly Express
fig = px.line(
    summed_df,  
    x='year',  # X-axis represents the year
    y='offence_per_100k',  # Y-axis represents the offence per 100k
    color='suburb',  # Different lines for each suburb
    title="Offences per 100,000 people Over Time for Selected Suburbs",  # Title of the chart
    labels={'offence_per_100k': 'Offences per 100,000 people', 'year': 'Year'},  # Axis labels
)

fig.update_xaxes(
    tickmode='array',  # Specify manual ticks
    tickvals=[2019, 2020, 2021, 2022, 2023],  # Years as discrete values
    ticktext=["2019", "2020", "2021", "2022", "2023"],  # Labels for the ticks
    type='category'  # Ensure the axis is categorical
)

# Save the figure as a PNG file
fig.write_html("../../plots/crime_visualisations/offence_per_100k_top_suburbs.html")

