# Clean Crime Data

In [1]:
# Import the required libraries
import requests
import json
import pandas as pd


In [4]:
#Import crime by suburb data
#encodings = ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252', 'utf-16']

# Specify the file path and sheet name
file_path = "Resources\Data_Tables_LGA_Criminal_Incidents_Year_Ending_March_2023.xlsx"
sheet_name = "Table 03"

# Read the Excel file, specifying the sheet name and header row
crime_df = pd.read_excel(file_path, sheet_name, header=[0])

# Define the columns to drop
columns_to_drop = ["Year ending", "Local Government Area", "Suburb/Town Name"]

# Drop the specified columns
crime_df = crime_df.drop(columns_to_drop, axis=1)

# Rename the columns
new_column_names = {
    "Postcode": "POST_CODE"
}
crime_df = crime_df.rename(columns=new_column_names)

# Display the updated DataFrame
crime_df

Unnamed: 0,Year,POST_CODE,Offence Division,Offence Subdivision,Offence Subgroup,Incidents Recorded
0,2023,3691,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,1
1,2023,3691,A Crimes against the person,Other crimes against the person,Other crimes against the person,1
2,2023,3691,B Property and deception offences,B40 Theft,B42 Steal from a motor vehicle,2
3,2023,3691,B Property and deception offences,B40 Theft,B49 Other theft,1
4,2023,3691,D Public order and security offences,D10 Weapons and explosives offences,D11 Firearms offences,1
...,...,...,...,...,...,...
330929,2014,3489,B Property and deception offences,B20 Property damage,B21 Criminal damage,1
330930,2014,3489,B Property and deception offences,B30 Burglary/Break and enter,B322 Non-residential non-aggravated burglary,1
330931,2014,3489,E Justice procedures offences,E20 Breaches of orders,E22 Breach intervention order,1
330932,2014,3491,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,1


In [5]:
#Import mapping file - use to filter to sample postcodes for further analysis
# Specify the file path and sheet name
file_path = "Resources/Melbourne Postcodes.xlsx"
sheet_name = "Mapping"

# Read the specified sheet into a DataFrame
melb_postcodes_df = pd.read_excel(file_path, sheet_name=sheet_name)
melb_postcodes_df= melb_postcodes_df.dropna(subset=['ABS_SA2_KEY'])
melb_postcodes_df= melb_postcodes_df.dropna(subset=['HOUSE_LOCALITY'])
melb_postcodes_df= melb_postcodes_df.dropna(subset=['SCHOOL_POST_CODE'])

# Display the DataFrame
melb_postcodes_df

Unnamed: 0,MUNICIPALITY,CITY_SHIRE,SUBURB_GROUP,RURAL_TOWNSHIP,POST_CODE,ABS_SA2_KEY,HOUSE_LOCALITY,SCHOOL_POST_CODE,SUBURB_NAME,COMMENTS,SUBURB_POSTCODE_COMMENTS
0,Inner City municipalities and their suburbs,City of Melbourne,Inner,,3053,206041117: Carlton,CARLTON,3053.0,Carlton,,Carlton 3053
2,Inner City municipalities and their suburbs,City of Yarra,Inner,,3054,206071140: Carlton North - Princes Hill,CARLTON NORTH,3054.0,Carlton North,Shared with City of Yarra,Carlton North 3054 (Shared with City of Yarra)
6,Northern municipalities and their suburbs,City of Moonee Valley,Mid,,3031,206031115: Flemington,FLEMINGTON,3031.0,Flemington,Shared with City of Moonee Valley,Flemington 3031 (Shared with City of Moonee Va...
7,Northern municipalities and their suburbs,City of Moonee Valley,Mid,,3031,206031115: Flemington,KENSINGTON,3031.0,Kensington,,Kensington 3031
10,Inner City municipalities and their suburbs,City of Melbourne,Inner,,3051,206041506: North Melbourne,NORTH MELBOURNE,3051.0,North Melbourne,Shared with City of Moonee Valley,North Melbourne 3051 (Shared with City of Moon...
...,...,...,...,...,...,...,...,...,...,...,...
997,Western municipalities and their suburbs,City of Wyndham,Outer,,3030,213011570: Derrimut,WERRIBEE,3030.0,Werribee,,Werribee 3030
1000,Western municipalities and their suburbs,City of Wyndham,Outer,,3030,213011570: Derrimut,WERRIBEE SOUTH,3030.0,Werribee South,,Werribee South 3030
1003,Western municipalities and their suburbs,City of Wyndham,Outer,,3024,213051579: Manor Lakes - Quandong,WYNDHAM VALE,3024.0,Wyndham Vale,,Wyndham Vale 3024
1005,Western municipalities and their suburbs,City of Melton,Outer,Rural localities,3338,213041571: Brookfield,EYNESBURY,3338.0,Eynesbury,Shared with the Shire of Melton,Eynesbury 3338 (Shared with the Shire of Melton)


In [6]:
# Concatenate SUBURB_NAME values for each POST_CODE
postcode_suburbs_df = melb_postcodes_df.groupby('POST_CODE')['SUBURB_NAME'].agg(lambda x: ', '.join(x)).reset_index()

# Display the resulting DataFrame
postcode_suburbs_df

Unnamed: 0,POST_CODE,SUBURB_NAME
0,3003,West Melbourne
1,3011,"Footscray, Seddon"
2,3012,"Brooklyn, Brooklyn, Kingsville, Maidstone, Wes..."
3,3013,"Yarraville, Aintree, Bonnie Brook"
4,3015,"Newport, Spotswood, South Kingsville"
...,...,...
188,3975,"Lynbrook, Lyndhurst"
189,3976,Hampton Park
190,3977,"Botanic Ridge, Cranbourne, Cranbourne East, Cr..."
191,3978,"Clyde, Clyde North"


In [9]:
crime_clean_1_df = pd.merge(melb_postcodes_df, crime_df, how='inner', on='POST_CODE')
columns_to_drop = ["RURAL_TOWNSHIP","ABS_SA2_KEY","HOUSE_LOCALITY","SCHOOL_POST_CODE",
                   "COMMENTS","SUBURB_NAME","SUBURB_POSTCODE_COMMENTS"]
crime_clean_1_df = crime_clean_1_df.drop(columns_to_drop, axis=1)
crime_clean_1_df = crime_clean_1_df.drop_duplicates()
crime_clean_1_df

Unnamed: 0,MUNICIPALITY,CITY_SHIRE,SUBURB_GROUP,POST_CODE,Year,Offence Division,Offence Subdivision,Offence Subgroup,Incidents Recorded
0,Inner City municipalities and their suburbs,City of Melbourne,Inner,3053,2023,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,22
1,Inner City municipalities and their suburbs,City of Melbourne,Inner,3053,2023,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,47
2,Inner City municipalities and their suburbs,City of Melbourne,Inner,3053,2023,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or othe...",10
3,Inner City municipalities and their suburbs,City of Melbourne,Inner,3053,2023,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,37
4,Inner City municipalities and their suburbs,City of Melbourne,Inner,3053,2023,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,61
...,...,...,...,...,...,...,...,...,...
418251,Western municipalities and their suburbs,City of Wyndham,Outer,3024,2014,D Public order and security offences,D30 Public nuisance offences,D35 Improper movement on public or private space,1
418252,Western municipalities and their suburbs,City of Wyndham,Outer,3024,2014,D Public order and security offences,D30 Public nuisance offences,D36 Other public nuisance offences,1
418253,Western municipalities and their suburbs,City of Wyndham,Outer,3024,2014,E Justice procedures offences,E10 Justice procedures,E14 Pervert the course of justice or commit pe...,1
418254,Western municipalities and their suburbs,City of Wyndham,Outer,3024,2014,E Justice procedures offences,E20 Breaches of orders,E21 Breach family violence order,24


In [10]:
crime_clean_final_df = pd.merge(crime_clean_1_df, postcode_suburbs_df, how='inner', on='POST_CODE')
new_column_names = {
    "SUBURB_NAME": "SUBURB_NAMES",
    }
crime_clean_final_df = crime_clean_final_df.rename(columns=new_column_names)
crime_clean_final_df

Unnamed: 0,MUNICIPALITY,CITY_SHIRE,SUBURB_GROUP,POST_CODE,Year,Offence Division,Offence Subdivision,Offence Subgroup,Incidents Recorded,SUBURB_NAMES
0,Inner City municipalities and their suburbs,City of Melbourne,Inner,3053,2023,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,22,Carlton
1,Inner City municipalities and their suburbs,City of Melbourne,Inner,3053,2023,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,47,Carlton
2,Inner City municipalities and their suburbs,City of Melbourne,Inner,3053,2023,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or othe...",10,Carlton
3,Inner City municipalities and their suburbs,City of Melbourne,Inner,3053,2023,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,37,Carlton
4,Inner City municipalities and their suburbs,City of Melbourne,Inner,3053,2023,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,61,Carlton
...,...,...,...,...,...,...,...,...,...,...
143722,Western municipalities and their suburbs,City of Wyndham,Outer,3024,2014,D Public order and security offences,D30 Public nuisance offences,D35 Improper movement on public or private space,1,"Manor Lakes, Wyndham Vale, Mambourin"
143723,Western municipalities and their suburbs,City of Wyndham,Outer,3024,2014,D Public order and security offences,D30 Public nuisance offences,D36 Other public nuisance offences,1,"Manor Lakes, Wyndham Vale, Mambourin"
143724,Western municipalities and their suburbs,City of Wyndham,Outer,3024,2014,E Justice procedures offences,E10 Justice procedures,E14 Pervert the course of justice or commit pe...,1,"Manor Lakes, Wyndham Vale, Mambourin"
143725,Western municipalities and their suburbs,City of Wyndham,Outer,3024,2014,E Justice procedures offences,E20 Breaches of orders,E21 Breach family violence order,24,"Manor Lakes, Wyndham Vale, Mambourin"


In [11]:
unique_count = crime_clean_final_df['POST_CODE'].nunique()

# Display the count of unique strings
print(unique_count)

193


In [15]:
# Check for NaN values in a column
has_nan = crime_clean_final_df['Incidents Recorded'].isna().any()

# Display the result - False (no NaN values in column), True (at least one NaN value in column)
print(has_nan)

False


In [16]:
crime_clean_final_df = crime_clean_final_df.drop_duplicates()
# Reset the index and make POST_CODE the new index
crime_clean_final_df = crime_clean_final_df.set_index('POST_CODE')

# Export cleaned population dataset to a CSV file
crime_clean_final_df.to_csv("Cleaned_Data/crime_clean_final.csv", index_label="POST_CODE")
crime_clean_final_df


Unnamed: 0_level_0,MUNICIPALITY,CITY_SHIRE,SUBURB_GROUP,Year,Offence Division,Offence Subdivision,Offence Subgroup,Incidents Recorded,SUBURB_NAMES
POST_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
3053,Inner City municipalities and their suburbs,City of Melbourne,Inner,2023,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,22,Carlton
3053,Inner City municipalities and their suburbs,City of Melbourne,Inner,2023,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,47,Carlton
3053,Inner City municipalities and their suburbs,City of Melbourne,Inner,2023,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or othe...",10,Carlton
3053,Inner City municipalities and their suburbs,City of Melbourne,Inner,2023,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,37,Carlton
3053,Inner City municipalities and their suburbs,City of Melbourne,Inner,2023,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,61,Carlton
...,...,...,...,...,...,...,...,...,...
3024,Western municipalities and their suburbs,City of Wyndham,Outer,2014,D Public order and security offences,D30 Public nuisance offences,D35 Improper movement on public or private space,1,"Manor Lakes, Wyndham Vale, Mambourin"
3024,Western municipalities and their suburbs,City of Wyndham,Outer,2014,D Public order and security offences,D30 Public nuisance offences,D36 Other public nuisance offences,1,"Manor Lakes, Wyndham Vale, Mambourin"
3024,Western municipalities and their suburbs,City of Wyndham,Outer,2014,E Justice procedures offences,E10 Justice procedures,E14 Pervert the course of justice or commit pe...,1,"Manor Lakes, Wyndham Vale, Mambourin"
3024,Western municipalities and their suburbs,City of Wyndham,Outer,2014,E Justice procedures offences,E20 Breaches of orders,E21 Breach family violence order,24,"Manor Lakes, Wyndham Vale, Mambourin"
