In [None]:
# Dependencies
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd
from pathlib import Path

In [None]:
# set path
crime_all_path = "crime-data/combined_wacrime.csv"

# read the csv file into pandas and remove unnamed columns
crime_all_df = pd.read_csv(crime_all_path, index_col=0, encoding="UTF-8")
crime_all_df.loc[:, ~crime_all_df.columns.str.contains('Unnamed')]
crime_all_df = crime_all_df.dropna(how="any", axis=0)

In [None]:
# drop the total columns
crime_all_df = crime_all_df.drop(columns=['Homicide Total', 
                                   'Recent Sexual Offence Total', 
                                   'Historical Sexual Offence Total',
                                   'Assault (Family) Total',
                                   'Assault (Non-Family) Total',
                                   'Threatening Behaviour (Family) Total',
                                   'Threatening Behaviour (Non-Family) Total',
                                   'Deprivation of Liberty Total',
                                   'Robbery Total',
                                   'Burglary Total',
                                   'Stealing Total',
                                   'Property Damage Total',
                                   'Arson Total',
                                   'Drug Offences Total',
                                   'Receiving and Possession of Stolen Property Total',
                                   'Fraud & Related Offences Total',
                                   'Breach of Violence Restraint Order Total',
                                   'Total Selected Miscellaneous Offences'],
                                   axis=0)
crime_all_df

In [None]:
# drop null values and remaining unnamed column
clean_df = crime_all_df.drop(columns=["Unnamed: 32", "Unnamed: 51", "Unnamed: 52", "Unnamed: 63" ], axis=0)
cleaned_df = clean_df.drop(columns=["Month and Year"], axis=0)
cleaned_df

In [None]:
# now that nulls are dropped, convert all to integer type
cleaned_df = cleaned_df.astype(int)
cleaned_df

In [None]:
#count the total numbers of each type of crime
count_all = cleaned_df.loc[:, "Murder" : "Breach of Police Order"].sum()
count_all

In [None]:
#display above results in bar graph from higest to lowest
types_plot = count_all.sort_values().plot(kind='barh', figsize=(8,10), alpha=1, align="edge")
plt.ylabel("Type of crime")
plt.xlabel("Total number")
plt.title("Highest to lowest type of crimes in WA from Jan 07 - Sep 23")
plt.show()

In [None]:
# find the top highest totals and create a string for the remaining
top_5_crimes = count_all.sort_values(ascending=False).head(5)
other_crimes = count_all.sort_values(ascending=False).tail(44).sum()

In [None]:
# enter above found data in a list and plot as pie to visualise top 5 and a slice for all others
slices = ['Stealing (Not Elsewhere Classified)', 'Damage', 'Burglary (Dwelling)', 'Common Assault (Family)', 'Drug Possession', 'Others']
numbers_top = [1643932, 917896, 780266, 430094, 403628, 4149526]
colors = ["yellowgreen", "red", "deepskyblue", "lightcoral", "purple", "slategrey"]

plt.pie(numbers_top, labels=slices, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)

plt.axis("equal")
plt.title("Higest 5 crimes in WA 2007-2023")
plt.show()

In [None]:
# find the top highest totals and create a string for the remaining
low_5_crimes = count_all.sort_values(ascending=True).head(5)
other_crimes_low = count_all.sort_values(ascending=True).tail(44).sum()

In [None]:
# enter above found data in a list and plot as pie to visualise bottom 5 and a slice for all others
sections = ['Manslaughter', 'Attempted / Conspiracy to Murder', 'Kidnapping / Child Stealing', 'Murder', 'Other Fire Related Offences']
numbers_low = [274, 452, 754, 976, 1248]

plt.pie(low_5_crimes, labels=sections, autopct="%1.1f%%", shadow=True, startangle=140)

plt.axis("equal")
plt.title("Lowest 5 crimes in WA 2007-2023 (shown as a 100% however is overall less than 1% of all crimes)")
plt.show()


In [None]:
region_df = cleaned_df.groupby(['Region'])
region_sum = region_df.sum().transpose()
region_sum = region_sum[["Regional", "Metropolitan"]]

In [None]:
region_sum = pd.DataFrame(region_sum)
region_sum

region_sum.plot(kind="barh", stacked=True, figsize=(10,12), alpha=1)
plt.title("Numbers of different types of crimes displayed in the last five years")
plt.xlabel("Number of each crime")
plt.ylabel("Types of crime")
plt.show()


In [None]:
# Rename Month and year column to Date
clean_df = clean_df.rename(columns={'Month and Year': 'Date'})

# Convert Date column to date time
clean_df['Date'] = pd.to_datetime(clean_df['Date'], format='%b-%y')

# Create new columns for Month and Year
clean_df['Month'] = clean_df['Date'].dt.month
clean_df['Year'] = clean_df['Date'].dt.year

# Remove the date and month as not relevant to my questions
clean_df2 = clean_df.drop(columns=["Month", "Date"], axis=0)

# Set index to Year
clean_df2 = clean_df2.set_index("Year")

clean_df2.head()

In [None]:
# Delete all NaN rows in index
clean_df2 = clean_df2.reset_index().dropna().set_index('Year')
clean_df2.index = clean_df2.index.astype(int)
# Group the years
year_df = clean_df2.groupby(["Year"])
# Grouped year_wacrime_df
year_df = year_df.sum().head(17)
year_df

In [None]:
plotdata = year_df.tail(10).transpose()

plotdata = pd.DataFrame(plotdata)
plotdata

plotdata.plot(kind="barh", stacked=True, figsize=(10,12), alpha=1)
plt.title("Numbers of different types of crimes displayed in the last ten years")
plt.xlabel("Number of each crime")
plt.ylabel("Types of crime")
plt.show()