In [1]:
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd
from pathlib import Path


In [32]:
# Create a path variable to find and clean the data
combined_wacrime = Path('crime-data/combined_wacrime.csv')
combined_wacrime_df = pd.read_csv(combined_wacrime)

combined_wacrime_df = combined_wacrime_df[combined_wacrime_df.Region != 'Regional']
combined_wacrime_df = combined_wacrime_df[combined_wacrime_df.Region != 'Metropolitan']

# Rename Month and year column to Date
combined_wacrime_df['Date'] = pd.to_datetime(combined_wacrime_df['Month and Year'], format='%b-%y')

# Create new columns for Month and Year
combined_wacrime_df['Month'] = combined_wacrime_df['Date'].dt.month
combined_wacrime_df['Year'] = combined_wacrime_df['Date'].dt.year

# Drop all Nan Rows
combined_wacrime_df = combined_wacrime_df.dropna(how='any', axis=0)

combined_wacrime_df.head()


In [33]:
# Reduce columns (take out all total columns)
combined_wacrime_df = combined_wacrime_df[['Month', 'Year','Murder','Attempted / Conspiracy to Murder','Manslaughter',
                                            'Driving Causing Death','Sexual Assault','Non-Assaultive Sexual Offences',
                                            'Serious Assault (Family)','Common Assault (Family)','Serious Assault (Non-Family)',
                                            'Common Assault (Non-Family)','Assault Police Officer','Threatening Behaviour (Family)',
                                            'Possess Weapon to Cause Fear (Family)','Threatening Behaviour (Non-Family)',
                                            'Possess Weapon to Cause Fear (Non-Family)','Kidnapping / Child Stealing',
                                            'Deprivation of Liberty','Robbery (Business)','Robbery (Non-Business)',
                                            'Burglary (Dwelling)','Burglary (Non-Dwelling)','Stealing of Motor Vehicle',
                                            'Stealing From Motor Vehicle (Contents or Parts)',
                                            'Stealing From Retail Premises (Shoplift)','Stealing From Dwelling',
                                            'Stealing From Other Premises or Place','Stealing as a Servant',
                                            'Stealing (Not Elsewhere Classified)','Criminal Damage','Damage','Cause Bushfire',
                                            'Cause Damage by Fire','Other Fire Related Offences','Drug Dealing','Drug Possession',
                                            'Possession of Drug Paraphernalia','Cultivate or Manufacture Drugs','Other Drug Offences',
                                            'Possess Stolen Property','Receiving Stolen Property','Regulated Weapons Offences',
                                            'Graffiti','Forgery','Fraud (Credit Card)','Fraud (Not Elsewhere Classified)',
                                            'Breach of Family Violence Restraint Order','Breach of Violence Restraint Order',
                                            'Breach of Police Order']]
combined_wacrime_df

In [34]:
# Replace all Nan values with '0'
combined_wacrime_df = combined_wacrime_df.replace(np.nan, 0)

# Convert all values to integers
combined_wacrime_df = combined_wacrime_df.astype(int)

# Convert Month to month name
combined_wacrime_df['Month'] = pd.to_datetime(combined_wacrime_df['Month'], format='%m').dt.month_name().str.slice(stop=3)                                         
                           
# Group the years
year_wacrime_df = combined_wacrime_df.groupby(['Year'])
print(year_wacrime_df)

# Grouped year_wacrime_df
year_wacrime_df.sum().head(17)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000026DC3AA7610>


In [35]:
# Total the number of crimes for each year
total_crimes_df = combined_wacrime_df.groupby('Year').sum(numeric_only=True, min_count=0)

# Print totals
total_crimes_df.sum(axis=1)

# Create a Total Crime column
total_crimes_df['Total Crimes'] = total_crimes_df.sum(axis=1)

# Format Column and print results                 
years_df = total_crimes_df.loc[:,['Total Crimes']].head(17).style.format("{:,.0f}")
years_df

In [44]:
year_line = total_crimes_df.plot(kind='line', color='green', grid=True, y='Total Crimes', title=('Crime Rate Statistics For Each Year (2007 - 2023)'))
year_line.set_ylabel('Total Crimes')
year_line.set_xlabel('Year')
plt.show

In [43]:
# Re-create graph in bar chart highlighting each Year
years = ['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023']
total_crimes = total_crimes_df['Total Crimes']
x_axis = np.arange(0, len(years))
tick_locations = []
for x in x_axis:
    tick_locations.append(x)
plt.title('Crime Rate Statistics For Each Year (2007 - 2023)')
plt.xlabel('Year')
plt.ylabel('Total Crimes')

plt.xlim(-0.75, len(years)-.25)
plt.ylim(200000, max(total_crimes) + 10000)

plt.bar(x_axis, total_crimes, facecolor='lightblue', alpha=0.75, align='center')
plt.xticks(tick_locations, years, rotation=45, rotation_mode="anchor", ha="right", wrap=True)
plt.show()


In [38]:
# Drop year column
combined_wacrime_df = combined_wacrime_df.drop(columns=['Year'])

# Group the Months
months_wacrime_df = combined_wacrime_df.groupby(['Month'])
print(months_wacrime_df)

months_wacrime_df.sum().head(12)


<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000026DC7104CD0>


In [39]:
# Total the number of crimes for each Month over the 17 year period
total_crimes_months_df = combined_wacrime_df.groupby('Month').sum(numeric_only=True, min_count=0)

# Print totals
total_crimes_months_df.sum(axis=1)

# Create a Total Crime column
total_crimes_months_df['Total Crimes'] = total_crimes_months_df.sum(axis=1)

# Reorder the Months
new_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
total_crimes_months_df = total_crimes_months_df.reindex(new_order, axis=0)

# Format Column and rows                  
months_df = total_crimes_months_df.loc[:,['Total Crimes']].head(12).style.format('{:,.0f}')

months_df


In [41]:
# Create line graph of results
month_plt = total_crimes_months_df.plot(kind='line', grid=True, color='blue', y='Total Crimes', title=('Crime Rate Statistics For Each Month Collectively (2007 - 2023)'))
month_plt.set_ylabel('Total Crimes')
month_plt.set_xlabel('Month')
month_plt.legend(loc='best')
plt.show


In [42]:
# Re-create graph in bar chart highlighting each month
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
total_crimes = total_crimes_months_df['Total Crimes']
x_axis = np.arange(0, len(months))
tick_locations = []
for x in x_axis:
    tick_locations.append(x)
plt.title('Crime Rate Statistics For Each Month Collectively (2007 - 2023)')
plt.xlabel("Month")
plt.ylabel('Total Crimes')

plt.xlim(-0.75, len(months)-.25)
plt.ylim(330000, max(total_crimes) + 10000)

plt.bar(x_axis, total_crimes, facecolor='purple', alpha=0.75, align='center')
plt.xticks(tick_locations, months)
plt.show()