In [1]:
# Import dependencies

import pandas as pd
from pathlib import Path

In [2]:
# Import crime data to pandas dataframe

crime_df = pd.read_csv(Path("../cleaned_data_2015_2018/all_data_merged_cleaned_2015_2018.csv"))
crime_df

Unnamed: 0,event_unique_id,crime,occ_year,occ_month,occ_day,occ_dow,occ_hour,premises_type,hood_140,neighbourhood_140,long_wgs84,lat_wgs84,date
0,GO-20155154,assault,2015,January,1,Thursday,23,House,115,Mount Dennis (115),-79.504668,43.693238,2015-01-01
1,GO-20151233,assault,2015,January,1,Thursday,3,Commercial,77,Waterfront Communities-The Island (77),-79.392855,43.647315,2015-01-01
2,GO-2015862,assault,2015,January,1,Thursday,2,Commercial,77,Waterfront Communities-The Island (77),-79.387700,43.649776,2015-01-01
3,GO-2015182,assault,2015,January,1,Thursday,0,Commercial,1,West Humber-Clairville (1),-79.595562,43.686751,2015-01-01
4,GO-2015276,assault,2015,January,1,Thursday,0,Outside,77,Waterfront Communities-The Island (77),-79.400096,43.645835,2015-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...
166507,GO-20181887731,homicide,2018,October,12,Friday,0,,75,Church-Yonge Corridor (75),-79.376699,43.656958,2018-10-12
166508,GO-20181923388,homicide,2018,October,18,Thursday,0,,120,Clairlea-Birchmount (120),-79.286292,43.699070,2018-10-18
166509,GO-20181939006,homicide,2018,October,20,Saturday,0,,124,Kennedy Park (124),-79.264551,43.732765,2018-10-20
166510,GO-20182113063,homicide,2018,November,16,Friday,0,,73,Moss Park (73),-79.369005,43.654614,2018-11-16


In [3]:
# Create new dataframe by counting occurences of certain crimes and grouping by date and day of the week (occ_dow)
daily_crime = crime_df.groupby(by=['date', "occ_dow"])["crime"].value_counts().unstack(fill_value=0)
daily_crime

Unnamed: 0_level_0,crime,assault,auto_theft,bicycle_theft,break_and_enter,homicide,robbery,shooting,theft_from_motor_vehicle,theft_over
date,occ_dow,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-01,Thursday,158,7,5,21,0,8,0,34,5
2015-01-02,Friday,34,9,4,18,0,6,0,34,2
2015-01-03,Saturday,37,10,0,4,0,8,0,21,3
2015-01-04,Sunday,44,5,1,11,0,5,0,33,1
2015-01-05,Monday,39,6,1,24,0,5,1,31,2
...,...,...,...,...,...,...,...,...,...,...
2018-12-27,Thursday,38,7,2,21,0,7,0,30,1
2018-12-28,Friday,35,13,2,21,0,12,0,31,1
2018-12-29,Saturday,37,10,3,26,0,7,0,21,4
2018-12-30,Sunday,43,8,3,22,0,7,0,22,3


In [4]:
daily_crime.columns

Index(['assault', 'auto_theft', 'bicycle_theft', 'break_and_enter', 'homicide',
       'robbery', 'shooting', 'theft_from_motor_vehicle', 'theft_over'],
      dtype='object', name='crime')

In [5]:
# Rename columns for data clarity

target_columns = [f'{column}_count' for column in daily_crime.columns]

daily_crime.columns = target_columns
daily_crime

Unnamed: 0_level_0,Unnamed: 1_level_0,assault_count,auto_theft_count,bicycle_theft_count,break_and_enter_count,homicide_count,robbery_count,shooting_count,theft_from_motor_vehicle_count,theft_over_count
date,occ_dow,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-01,Thursday,158,7,5,21,0,8,0,34,5
2015-01-02,Friday,34,9,4,18,0,6,0,34,2
2015-01-03,Saturday,37,10,0,4,0,8,0,21,3
2015-01-04,Sunday,44,5,1,11,0,5,0,33,1
2015-01-05,Monday,39,6,1,24,0,5,1,31,2
...,...,...,...,...,...,...,...,...,...,...
2018-12-27,Thursday,38,7,2,21,0,7,0,30,1
2018-12-28,Friday,35,13,2,21,0,12,0,31,1
2018-12-29,Saturday,37,10,3,26,0,7,0,21,4
2018-12-30,Sunday,43,8,3,22,0,7,0,22,3


In [6]:
# Reset index to flatten the dataframe

daily_crime.reset_index(inplace=True)

In [7]:
daily_crime.columns

Index(['date', 'occ_dow', 'assault_count', 'auto_theft_count',
       'bicycle_theft_count', 'break_and_enter_count', 'homicide_count',
       'robbery_count', 'shooting_count', 'theft_from_motor_vehicle_count',
       'theft_over_count'],
      dtype='object')

In [8]:
# Import weather data to pandas dataframe

daily_weather_df = pd.read_csv(Path('../cleaned_data_2015_2018/toronto_daily_weather_2015_2018.csv'))
daily_weather_df

Unnamed: 0,date,max_temperature,min_temperature,max_relative_humidity,avg_relative_humidity,avg_pressure_sea,max_wind_speed,precipitation,rain,snow,snow_on_ground,daylight,avg_cloud_cover_8
0,2018-12-31,5.0,-2.5,94,79.5,100.78,21,13.6,13.6,0.0,0.0,8.98,4.5
1,2018-12-30,0.7,-6.6,88,78.0,102.03,24,1.2,0.0,1.6,2.0,8.97,5.0
2,2018-12-29,2.7,-7.0,80,68.0,101.86,34,0.0,0.0,0.0,0.0,8.95,4.5
3,2018-12-28,12.4,2.7,95,84.5,100.89,30,1.8,1.8,0.0,0.0,8.93,4.5
4,2018-12-27,2.8,-7.4,87,74.0,102.52,30,5.4,5.4,0.0,0.0,8.93,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,2015-01-05,-5.8,-14.4,83,69.5,102.38,45,0.8,0.0,0.8,1.0,9.05,3.5
1457,2015-01-04,4.3,-5.9,98,82.5,100.69,43,6.8,6.4,0.4,0.0,9.02,6.0
1458,2015-01-03,1.5,-5.8,97,82.5,101.96,29,10.8,8.0,2.4,0.0,9.00,5.0
1459,2015-01-02,-1.0,-6.1,83,66.0,102.30,36,0.6,0.0,0.4,0.0,9.00,6.0


In [9]:
# Merge both dataframes on the date columns
crime_weather_df = daily_weather_df.merge(daily_crime, how="left", on="date")
crime_weather_df

Unnamed: 0,date,max_temperature,min_temperature,max_relative_humidity,avg_relative_humidity,avg_pressure_sea,max_wind_speed,precipitation,rain,snow,...,occ_dow,assault_count,auto_theft_count,bicycle_theft_count,break_and_enter_count,homicide_count,robbery_count,shooting_count,theft_from_motor_vehicle_count,theft_over_count
0,2018-12-31,5.0,-2.5,94,79.5,100.78,21,13.6,13.6,0.0,...,Monday,38,14,2,20,0,11,0,23,4
1,2018-12-30,0.7,-6.6,88,78.0,102.03,24,1.2,0.0,1.6,...,Sunday,43,8,3,22,0,7,0,22,3
2,2018-12-29,2.7,-7.0,80,68.0,101.86,34,0.0,0.0,0.0,...,Saturday,37,10,3,26,0,7,0,21,4
3,2018-12-28,12.4,2.7,95,84.5,100.89,30,1.8,1.8,0.0,...,Friday,35,13,2,21,0,12,0,31,1
4,2018-12-27,2.8,-7.4,87,74.0,102.52,30,5.4,5.4,0.0,...,Thursday,38,7,2,21,0,7,0,30,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,2015-01-05,-5.8,-14.4,83,69.5,102.38,45,0.8,0.0,0.8,...,Monday,39,6,1,24,0,5,1,31,2
1457,2015-01-04,4.3,-5.9,98,82.5,100.69,43,6.8,6.4,0.4,...,Sunday,44,5,1,11,0,5,0,33,1
1458,2015-01-03,1.5,-5.8,97,82.5,101.96,29,10.8,8.0,2.4,...,Saturday,37,10,0,4,0,8,0,21,3
1459,2015-01-02,-1.0,-6.1,83,66.0,102.30,36,0.6,0.0,0.4,...,Friday,34,9,4,18,0,6,0,34,2


In [10]:
# Export data to CSV
import os

os.makedirs(Path("../cleaned_data_2015_2018/combined_data"), exist_ok= True)
crime_weather_df.to_csv(Path("../cleaned_data_2015_2018/combined_data/crime_weather_data_2015_2018.csv"), index=False)