In [None]:
# Dependencies
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd
from pathlib import Path
import requests
import hvplot.pandas

# Import the OpenWeatherMap API key
from api_keys import geoapify_key

In [None]:
csv_file = Path("crime-data/combined_wacrime.csv")
city_data_df = pd.read_csv(csv_file)


#remove last row from data frame
city_data_df.drop(city_data_df.tail(1).index,inplace=True)
new_crime_data = city_data_df.fillna(0)

# Display sample data
new_crime_data 

In [None]:
#check for columns to be removed
new_crime_data.columns

In [None]:
#Clean Data - Drop date and columns before using groupby
no_date_df = new_crime_data.drop(['Month and Year','Homicide Total','Recent Sexual Offence Total',
                                  'Historical Sexual Offence Total','Assault (Family) Total','Assault (Non-Family) Total',
                                 'Threatening Behaviour (Family) Total','Threatening Behaviour (Non-Family) Total',
                                 'Deprivation of Liberty Total', 'Robbery Total','Burglary Total', 'Stealing Total', 
                                 'Property Damage Total', 'Arson Total','Drug Offences Total', 
                                 'Receiving and Possession of Stolen Property Total', 'Fraud & Related Offences Total',
                                 'Breach of Violence Restraint Order Total', 'Total Selected Miscellaneous Offences',
                                 'Unnamed: 32','Unnamed: 52', 'Unnamed: 51', 'Unnamed: 63' ], axis = 1)

#Group by region
grouped_data = no_date_df.groupby(['Region']).sum()

#Add a final column that is sum of all crimes in row
grouped_data['Total']=grouped_data.iloc[:,1:-1].sum(axis=1)

# Display sample data
grouped_data

In [None]:
#Check if cleaned properly
grouped_data.columns

In [None]:
#Add Latitude and Longitude
grouped_data["Lat"] = ""
grouped_data["Lon"] = ""
grouped_data

In [None]:
#Reset Index and change name so Geo_apify can find
indexed_data = grouped_data.reset_index()
fixed_data = indexed_data.replace('Mid_West_Gascoyne', 'West_Gascoyne')
fixed_data

In [None]:
 # Define the API parameters
params = {
    "apiKey":geoapify_key,
    "format":"json"
}

# Set the base URL
base_url = "https://api.geoapify.com/v1/geocode/search"

In [None]:
# Loop through the cities_pd DataFrame and search coordinates for each city
for index, row in fixed_data.iterrows():

    # Get the city's name & add ", Australia" to the string so geoapify finds the correct city
    city = row["Region"]  + ", WA, Australia"

    # Add the current city to the parameters
    params["text"] = f"{city}"

    # Make the API request
    response = requests.get(base_url, params=params)
    
    # Convert reponse to JSON
    response = response.json()

    # Extract latitude and longitude
    fixed_data.loc[index, "Lat"] = response["results"][0]["lat"]
    fixed_data.loc[index, "Lon"] = response["results"][0]["lon"]

# Display sample data to confirm that the coordinates appear
fixed_data

In [None]:
%%capture --no-display
# Configure the map plot
map_plot = fixed_data.hvplot.points(
    "Lon",
    "Lat",
    geo = True,
    tiles = "OSM",
    frame_width = 840,
    frame_height = 600,
    size = "Total",
    scale = 0.01,
    color = "Region"
)

# Display the map plot
map_plot

In [None]:
#Drop values that are outside Main Perth
dropped_data = fixed_data.drop([3, 6, 9, 13, 16])
dropped_data

In [None]:
%%capture --no-display
# Configure the map plot
map_plot_2 = dropped_data.hvplot.points(
    "Lon",
    "Lat",
    geo = True,
    tiles = "OSM",
    frame_width = 840,
    frame_height = 600,
    size = "Total",
    scale = 0.05,
    color = "Region"
)

# Display the map plot
map_plot_2

In [None]:
bigsmall_data = fixed_data.sort_values(by=['Total'])
small_big = bigsmall_data.drop([8, 14])
small_big

In [None]:
%%capture --no-display
# Configure the map plot
map_plot_3 = small_big.hvplot.points(
    "Lon",
    "Lat",
    geo = True,
    tiles = "OSM",
    frame_width = 840,
    frame_height = 600,
    size = "Total",
    scale = 0.01,
    color = "Region"
)


# Display the map plot
map_plot_3

In [None]:
dropped_data2 = small_big.drop([3, 4, 6, 9, 13, 16])
dropped_data2

In [None]:
%%capture --no-display
# Configure the map plot
map_plot_4 = dropped_data2.hvplot.points(
    "Lon",
    "Lat",
    geo = True,
    tiles = "OSM",
    frame_width = 840,
    frame_height = 600,
    size = "Total",
    scale = 0.05,
    color = "Region"
)

# Display the map plot
map_plot_4

In [None]:
big_small = small_big.iloc[::-1]
big_small

In [None]:
# Set x axis and tick locations
x_axis = np.arange(len(big_small))
tick_locations = [value+0.4 for value in x_axis]

 # Create a list indicating where to write x labels and set figure size to adjust for space

plt.figure(figsize=(8,5))
plt.bar(x_axis, big_small["Total"], color='r', alpha=0.5, align="edge")
plt.xticks(tick_locations, big_small["Region"], rotation="vertical")

 # Set x and y limits
plt.xlim(-0.15, len(x_axis)-0.05)
plt.ylim(0, max(big_small["Total"])+10000)

 # Set a Title and labels
plt.title("Total Crimes in Each Region")
plt.xlabel("Region")
plt.ylabel("Total Crimes")

#Line for average and median
mean = small_big["Total"].mean()
median = small_big["Total"].median()
plt.axhline(mean)
plt.axhline(median, color = "black")

# Save the figure
plt.savefig("figures/Fig1.png")

plt.show

In [None]:
metro_bigsmall = big_small.drop([3, 4, 6, 9, 13, 16])

metro_bigsmall

In [None]:
# Set x axis and tick locations
x_axis = np.arange(len(metro_bigsmall))
tick_locations = [value+0.4 for value in x_axis]

 # Create a list indicating where to write x labels and set figure size to adjust for space

plt.figure(figsize=(8,5))
plt.bar(x_axis, metro_bigsmall["Total"], color='r', alpha=0.5, align="edge")
plt.xticks(tick_locations, metro_bigsmall["Region"], rotation="vertical")

 # Set x and y limits
plt.xlim(-0.15, len(x_axis)-0.05)
plt.ylim(0, max(metro_bigsmall["Total"])+10000)

 # Set a Title and labels
plt.title("Total Crimes in Each Region")
plt.xlabel("Region")
plt.ylabel("Total Crimes")

#Line for average and median
mean = metro_bigsmall["Total"].mean()
median = metro_bigsmall["Total"].median()
plt.axhline(mean)
plt.axhline(median, color = "black")

# Save the figure
plt.savefig("figures/Fig2.png")

plt.show