In [12]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import date
pd.set_option('display.max_rows', 4)

In [13]:
df = pd.read_csv("Vancouver.csv")
df = df[df.YEAR >= 2015] # Remove rows with years less than 2015
df = df.reset_index(drop=True) # Resets the index

In [14]:
df.tail()

Unnamed: 0,TYPE,YEAR,MONTH,DAY,HOUR,MINUTE,HUNDRED_BLOCK,NEIGHBOURHOOD,X,Y
193373,Vehicle Collision or Pedestrian Struck (with I...,2018,8,27,17,33,YUKON ST / W 4TH AVE,Mount Pleasant,491800.0,5457199.0
193374,Vehicle Collision or Pedestrian Struck (with I...,2016,4,20,19,18,YUKON ST / W 5TH AVE,Mount Pleasant,491797.0,5457103.0
...,...,...,...,...,...,...,...,...,...,...
193376,Vehicle Collision or Pedestrian Struck (with I...,2018,8,4,16,22,YUKON ST / W BROADWAY AVE,Mount Pleasant,491786.0,5456719.0
193377,Vehicle Collision or Pedestrian Struck (with I...,2016,6,20,18,11,YUKON ST / W KING EDWARD AVE,Riley Park,491786.0,5455143.0


In [15]:
# Organizing columns
df.rename(columns={'TYPE': 'Crime-type'}, inplace=True) # Renaming wrong names
df.drop(['HUNDRED_BLOCK', 'NEIGHBOURHOOD', 'X', 'Y'], axis=1, inplace=True) # Dropping useless columns to this table
df['Crime-key'] = df.index # Creates Date_Key column based on index
df["Crime-key"] += 300 # Adds 300 to every row in the column
cols = ["Crime-key", "Crime-type", "YEAR", "MONTH", 'DAY', 'HOUR', 'MINUTE'] 
df = df[cols] # Reorganize the columns

In [16]:
# Creates a list of datetimes
datetimes = []
for i in range(df.shape[0]):
    hour = df["HOUR"][i]
    if df["HOUR"][i] > 11:
        date_period = "PM"
    else:
        date_period = "AM"
    month = df["MONTH"][i]
    year = df["YEAR"][i]
    day = df["DAY"][i]
    minute = df["MINUTE"][i]
    second = "00"
    date_string = str(day) + "-" + str(month) + "-" + str(year) + " " + \
                  str(hour) + ":" + str(minute) + ":" + str(second) + ' ' + date_period
    datetimes.append(datetime.strptime(date_string, '%d-%m-%Y %H:%M:%S %p'))
df['Crime-report-time'] = datetimes # Appends reported time to dataframe

In [17]:
# Organizing columns
df.drop(['YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTE'], axis=1, inplace=True) # Dropping columns
df['Crime-start-time'] = 'NaN'
df['Crime-end-time'] = 'NaN'

In [18]:
# Cretes the new column with the category of the crime-type
crime_types = df['Crime-type'].tolist()
category_mapping = {
    'Break and Enter Properties': ['Break and Enter Commercial', 'Break and Enter Residential/Other'],
    'Homicide': ['Homicide'],
    'Mischief': ['Mischief'],
    'Offence Against a Person': ['Offence Against a Person'],
    'Theft': ['Other Theft', 'Theft from Vehicle', 'Theft of Bicycle', 'Theft of Vehicle'],
    'Traffic': ['Vehicle Collision or Pedestrian Struck (with Fatality)', 'Vehicle Collision or Pedestrian Struck (with Injury)', 'Vehicle Collision or Pedestrian Struck (Fatal or not)'],
}

category_col = []
for c in crime_types:
    for k, v in category_mapping.items():
        if c in category_mapping[k]:
            category_col.append(k)
            break
            
df['Crime-category'] = category_col

In [19]:
# Creates the severity column
crime_type_severity = []
for crime_type in df['Crime-type'].tolist():
    if crime_type == 'Homicide':
        crime_type_severity.append(8)
    elif crime_type == 'Offence Against a Person':
        crime_type_severity.append(7)
    elif crime_type == 'Vehicle Collision or Pedestrian Struck (with Fatality)':
        crime_type_severity.append(6)
    elif crime_type == 'Vehicle Collision or Pedestrian Struck (with Injury)' or \
        crime_type == 'Vehicle Collision or Pedestrian Struck (Fatal or not)':
        crime_type_severity.append(5)
    elif crime_type == 'Mischief':
        crime_type_severity.append(4)
    elif crime_type == 'Break and Enter Commercial' or \
    crime_type == 'Break and Enter Residential/Other':
        crime_type_severity.append(3)
    elif crime_type == 'Theft of Bicycle' or crime_type == 'Theft of Vehicle':
        crime_type_severity.append(2)
    elif crime_type == 'Other Theft' or crime_type == 'Theft from Vehicle':
        crime_type_severity.append(1) 
        
df['Crime-type-severity-index'] = crime_type_severity

In [20]:
# Organizes the columns
cols = ['Crime-key', 'Crime-report-time', 'Crime-start-time', 'Crime-end-time', 'Crime-type', 'Crime-category', 'Crime-type-severity-index']
df = df[cols]

In [21]:
temp_list = df['Crime-key'].tolist()
is_fatal = []
for i in range(len(df.shape[0])):
    if temp_list[i] == "Homicide" or temp_list[i] == "smtgh"
        is_fatal.append(True)
    else:
        is_fatal.append(False)

Unnamed: 0,Crime-key,Crime-report-time,Crime-start-time,Crime-end-time,Crime-type,Crime-category,Crime-type-severity-index
0,300,2019-03-07 02:06:00,,,Break and Enter Commercial,Break and Enter Properties,3
1,301,2019-08-27 04:12:00,,,Break and Enter Commercial,Break and Enter Properties,3
...,...,...,...,...,...,...,...
193376,193676,2018-08-04 16:22:00,,,Vehicle Collision or Pedestrian Struck (with I...,Traffic,5
193377,193677,2016-06-20 18:11:00,,,Vehicle Collision or Pedestrian Struck (with I...,Traffic,5


In [22]:
df.to_csv("Vancouver_Crime_Final.csv", index=False) # Creates the csv file