# Toronto Crime Data Cleaning

 The cleaning process of this data is meant specifically for Toronto Crimes Data. A class was made to get all the different crime types to conform to a single dataframe structure.

In [10]:
from cleaning_tools.dataframe_tools import TorontoCrimeDataCleaner
from pathlib import Path

In [11]:
# Create dictionary that contains the crime types and paths to each crime dataset to be analyzed

crimes_dict = {
    "assault": Path("../datasets/Assault_Open_Data.csv"),
    "auto_theft": Path("../datasets/Auto_Theft_Open_Data.csv"),
    "break_and_enter": Path("../datasets/Break_and_Enter_Open_Data.csv"),
    "robbery": Path("../datasets/Robbery_Open_Data.csv"),
    "shooting": Path("../datasets/Shooting_and_Firearm_Discharges_Open_Data.csv"),
    "theft_from_motor_vehicle": Path("../datasets/Theft_From_Motor_Vehicle_Open_Data.csv"),
    "theft_over": Path("../datasets/Theft_Over_Open_Data.csv"),
    # "traffic_collision": Path("../datasets/Traffic_Collisions_(ASR-T-TBL-001).csv"),
    "bicycle_theft": Path("../datasets/Bicycle_Thefts_Open_Data.csv"),
    "homicide": Path("../datasets/Homicides_Open_Data_ASR_RC_TBL_002.csv")
}

export_folder = "../cleaned_data"

In [12]:
# Create TorontoCrimeDataCleaner class object, with the min and max date ranges passed as arguments
min_year = 2015
max_year = 2018

toronto_data_cleaner = TorontoCrimeDataCleaner(min_year=min_year, max_year=max_year)

In [13]:
# Add crimes_dict to TorontoCrimeDataCleaner class object to create and normalize the dataframes

toronto_data_cleaner.csv_dict_to_dataframes(crimes_dict)

"..\datasets\Assault_Open_Data.csv" exists
"..\datasets\Auto_Theft_Open_Data.csv" exists
"..\datasets\Break_and_Enter_Open_Data.csv" exists
"..\datasets\Robbery_Open_Data.csv" exists
"..\datasets\Shooting_and_Firearm_Discharges_Open_Data.csv" exists
"..\datasets\Theft_From_Motor_Vehicle_Open_Data.csv" exists
"..\datasets\Theft_Over_Open_Data.csv" exists
"..\datasets\Bicycle_Thefts_Open_Data.csv" exists
"..\datasets\Homicides_Open_Data_ASR_RC_TBL_002.csv" exists
--------------------------
Adding "assault" data from filepath "..\datasets\Assault_Open_Data.csv" to new DataFrame
Successfully loaded CSV to Pandas Dataframe
Starting to clean the data...
Normalizing column data...
Converting appropriate date data to integers...
Starting to clear whitespaces from object columns...
Filtering years from 2015 to 2018...
Creating 'DATE' column out of date columns...
Reformatting column names...
Successfully cleaned data!
Successfully added "assault" data from filepath "..\datasets\Assault_Open_Dat

In [14]:
# Use the merge function from the TorontoCrimeDataCleaner class object to create one Dataframe
all_data_cleaned_df = toronto_data_cleaner.merge_all_dataframes()

In [15]:
all_data_cleaned_df.head()

Unnamed: 0,event_unique_id,crime,occ_year,occ_month,occ_day,occ_dow,occ_hour,premises_type,hood_140,neighbourhood_140,long_wgs84,lat_wgs84,date
0,GO-20155154,assault,2015,January,1,Thursday,23,House,115,Mount Dennis (115),-79.504668,43.693238,2015-01-01
1,GO-20151233,assault,2015,January,1,Thursday,3,Commercial,77,Waterfront Communities-The Island (77),-79.392855,43.647315,2015-01-01
2,GO-2015862,assault,2015,January,1,Thursday,2,Commercial,77,Waterfront Communities-The Island (77),-79.3877,43.649776,2015-01-01
3,GO-2015182,assault,2015,January,1,Thursday,0,Commercial,1,West Humber-Clairville (1),-79.595562,43.686751,2015-01-01
4,GO-2015276,assault,2015,January,1,Thursday,0,Outside,77,Waterfront Communities-The Island (77),-79.400096,43.645835,2015-01-01


In [16]:
all_data_cleaned_df["event_unique_id"].value_counts()

GO-20155154       1
GO-2017323658     1
GO-2017297070     1
GO-2017303305     1
GO-2017304151     1
                 ..
GO-20181286110    1
GO-20181285543    1
GO-20181285120    1
GO-20181287249    1
GO-20182242344    1
Name: event_unique_id, Length: 166512, dtype: int64

In [17]:
# Use the export function to save all the cleaned data to CSV
toronto_data_cleaner.export_all_cleaned(export_folder)

Successfully saved Assault_Open_Data_cleaned_2015_2018.csv!
Successfully saved Auto_Theft_Open_Data_cleaned_2015_2018.csv!
Successfully saved Break_and_Enter_Open_Data_cleaned_2015_2018.csv!
Successfully saved Robbery_Open_Data_cleaned_2015_2018.csv!
Successfully saved Shooting_and_Firearm_Discharges_Open_Data_cleaned_2015_2018.csv!
Successfully saved Theft_From_Motor_Vehicle_Open_Data_cleaned_2015_2018.csv!
Successfully saved Theft_Over_Open_Data_cleaned_2015_2018.csv!
Successfully saved Bicycle_Thefts_Open_Data_cleaned_2015_2018.csv!
Successfully saved Homicides_Open_Data_ASR_RC_TBL_002_cleaned_2015_2018.csv!
Successfully saved all_data_merged_cleaned_2015_2018.csv!
