In [None]:
# Dependencies
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd
from pathlib import Path

In [None]:
# set path
crime_all_path = "crime-data/combined_wacrime.csv"

# read the csv file into pandas and remove unnamed columns
crime_all_df = pd.read_csv(crime_all_path, index_col=0, encoding="UTF-8")
crime_all_df.loc[:, ~crime_all_df.columns.str.contains('Unnamed')]
crime_all_df

In [None]:
# drop the total columns
crime_all_df.drop(columns=['Homicide Total', 
                                   'Recent Sexual Offence Total', 
                                   'Historical Sexual Offence Total',
                                   'Assault (Family) Total',
                                   'Assault (Non-Family) Total',
                                   'Threatening Behaviour (Family) Total',
                                   'Threatening Behaviour (Non-Family) Total',
                                   'Deprivation of Liberty Total',
                                   'Robbery Total',
                                   'Burglary Total',
                                   'Stealing Total',
                                   'Property Damage Total',
                                   'Arson Total',
                                   'Drug Offences Total',
                                   'Receiving and Possession of Stolen Property Total',
                                   'Fraud & Related Offences Total',
                                   'Breach of Violence Restraint Order Total',
                                   'Total Selected Miscellaneous Offences'],
                                   axis=0, inplace=True)
crime_all_df

In [None]:
# drop null values and remaining unnamed columns
clean_df = crime_all_df.dropna()
cleaned_df = clean_df.drop(columns=["Unnamed: 32", "Unnamed: 51", "Unnamed: 52", "Unnamed: 63" ], axis=0)
cleaned_df2 = cleaned_df.drop(columns=["Month and Year"])
cleaned_df2

In [None]:
# now that nulls are dropped, convert all to integer type
cleaned_df2 = cleaned_df2.astype(int)
cleaned_df2

In [None]:
#count the total numbers of each type of crime
count_all = cleaned_df2.loc[:, "Murder" : "Breach of Police Order"].sum()
count_all

In [None]:
#display above results in bar graph from higest to lowest
types_plot = count_all.sort_values().plot(kind='barh', figsize=(8,10), alpha=1, align="edge")
plt.ylabel("Type of crime")
plt.xlabel("Total number")
plt.title("Highest to lowest type of crimes in WA from Jan 07 - Sep 23")
plt.show()

In [None]:
# different and easy visual of top five types of crime
top_5_crimes = count_all.sort_values(ascending=False).head(5)
top_5_crimes

# Labels for the sections of our pie chart
my_labels = ["Stealing (Not Elsewhere Classified)", "Damage", "Burglary (Dwelling)", "Common Assault (Family)","Drug Possession"]

plt.pie(top_5_crimes, labels=my_labels,
        autopct="%1.1f%%", shadow=True, startangle=140)
# Create axes which are equal so we have a perfect circle
plt.title("Highest five crimes in WA 2007-2023")
plt.axis("equal")
plt.show()

In [None]:
# now for lowest five types of crimes
low_5_crimes = count_all.sort_values(ascending=True).head(5)
low_5_crimes

# Labels for the sections of our pie chart
my_labels = ["Manslaughter","Attempted / Conspiracy to Murder", "Kidnapping / Child Stealing", "Murder", "Other Fire Related Offences"]

plt.pie(low_5_crimes, labels=my_labels,
        autopct="%1.1f%%", shadow=True, startangle=140)
# Create axes which are equal so we have a perfect circle
plt.title("Lowest five crimes in WA 2007-2023")
plt.axis("equal")
plt.show()


In [None]:
# now looking at crime data by areas
region_df = cleaned_df.groupby(["Region"])
region_df

In [None]:
# find totals of each crime type in each area
totals_by_region = region_df.sum()
totals_by_region