# Group 1 project: Crime Data in Denver for 2015-2017

In [2]:
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np 
from datetime import datetime as dt 
import random
import matplotlib.colors
from pylab import rcParams

#dependencies for location heatmap 
from census import Census 
from config import (census_key, gkey)
import gmaps 
import requests 
import time 

#Census API Key 
c = Census(census_key, year = 2015) 

ModuleNotFoundError: No module named 'census'

## Importing crime data and offense code csv files and merging them on offense code

In [None]:
#import crime.csv file 
#import crime.csv file
df_crime = pd.read_csv('crimedata.csv')
df_offense = pd.read_csv('offense_codes.csv')
# Group OFFENSE_CODEs by OFFENSE_CATEGORY_ID
df_offense = df_offense.drop_duplicates(subset=['OFFENSE_CODE','OFFENSE_CATEGORY_ID'])

df_crimemerge = df_crime.merge(df_offense, on='OFFENSE_CODE')

df_crimemerge.count()

In [None]:
df_crimemerge['GEO_LON'].describe()

## Converting date object to a date time date type and refining our data

In [None]:
# Converting 'FIRST_OCCURRENCE_DATE','LAST_OCCURRENCE_DATE','REPORTED_DATE' from object to datetime:
df_crimemerge[['FIRST_OCCURRENCE_DATE','LAST_OCCURRENCE_DATE','REPORTED_DATE']] = df_crimemerge[['FIRST_OCCURRENCE_DATE','LAST_OCCURRENCE_DATE','REPORTED_DATE']].apply(pd.to_datetime)
df_crimemerge.head()

In [None]:
# Defining new data set by keeping only the columns we are plotting against:
df_crimemerge_refined = df_crimemerge.loc[:,["INCIDENT_ID","OFFENSE_ID","OFFENSE_CODE","OFFENSE_TYPE_ID_x","OFFENSE_CATEGORY_ID_x","FIRST_OCCURRENCE_DATE","REPORTED_DATE","INCIDENT_ADDRESS","GEO_LON","GEO_LAT","DISTRICT_ID","PRECINCT_ID","NEIGHBORHOOD_ID","IS_CRIME_x","IS_TRAFFIC_x","OFFENSE_CATEGORY_NAME"]]
df_crimemerge_refined.head()

In [None]:
# Renamimg comlumns with _x:
df_crimemerge_refined = df_crimemerge_refined.rename(index=str , columns={"OFFENSE_TYPE_ID_x" : "OFFENSE_TYPE_ID" , "OFFENSE_CATEGORY_ID_x" : "OFFENSE_CATEGORY_ID"})
df_crimemerge_refined.head()

In [None]:
# Dropping all missing values for Longitude and Latitude:
df_crimemerge_refined["GEO_LON"].replace('', np.nan, inplace=True)
df_crimemerge_refined.dropna(subset=["GEO_LON"], inplace=True)

df_crimemerge_refined["GEO_LAT"].replace('', np.nan, inplace=True)
df_crimemerge_refined.dropna(subset=["GEO_LAT"], inplace=True)

df_crimemerge_refined.count()

In [None]:
df_crimemerge_refined.head()

In [None]:
# df_crimemerge_refined = df_crimemerge_refined['OFFENSE_CATEGORY_ID'].astype(str)


## Creating our bins for each year 2015-2017

In [None]:
# 2015 bin
df_15 = df_crimemerge_refined.loc[df_crimemerge_refined[str('FIRST_OCCURRENCE_DATE')] < '1/1/16 0:00']
df_15.count()
df_15['FIRST_OCCURRENCE_DATE'].describe()


In [None]:
# 2016 bin
df_16 = df_crimemerge_refined.loc[(df_crimemerge_refined[str('FIRST_OCCURRENCE_DATE')] > '12/31/15 23:59') & (df_crimemerge_refined[str('FIRST_OCCURRENCE_DATE')] < '1/1/17 0:00')]
df_16.count()
df_16['FIRST_OCCURRENCE_DATE'].describe()

In [None]:
# 2017 bin
df_17 = df_crimemerge_refined.loc[df_crimemerge_refined[str('FIRST_OCCURRENCE_DATE')] > '12/31/16 23:59']
df_17.count()
df_17['FIRST_OCCURRENCE_DATE'].describe()

## 1st Plot: Number of offenses by category

In [None]:
offense_categories = df_crimemerge_refined.groupby('OFFENSE_CATEGORY_ID')
# offense_categories_count = offense_categories.count().astype(str)
#type(offense_categories)
offense_categories.head()

In [None]:
offense_counts = df_crimemerge_refined['OFFENSE_CATEGORY_ID'].value_counts(dropna=True, sort=True)
offense_counts.head()
df_offense = offense_counts.rename_axis('Offense Type').reset_index(name='counts')
df_offense['Offense Type'].count()

#plot graph
colors = ['blue', 'red', 'green', 'yellow', 'brown', 'orange', 'grey', 'purple', 'black', 'lightblue', 'gold', 'violet', 'pink', 'maroon']
# Creating our first plot: Count of offence type for all years
# plt.figure();

# offense_catgories.plot.hist()
# set categories as x-axis
x_axis = df_offense["Offense Type"]
# add tick mark for every offense type
tick_offense = [value for value in x_axis]

#plot graph
plt.figure(figsize=(20,3))
plt.bar(x_axis, df_offense['counts'], color=colors, alpha=0.9, align='center')
plt.xticks(tick_offense, df_offense['Offense Type'], rotation="vertical")


In [None]:
df_neighbor = offense_counts.rename_axis('Offense Types').reset_index(name='counts')
df_neighbor['Offense Types'].count()
# Labels for the sections of our pie chart
labels = df_neighbor["Offense Types"]

# The values of each section of the pie chart
sizes = df_neighbor["counts"]

# The colors of each section of the pie chart
colors = ["red", "orange", "lightcoral", "lightskyblue", "Purple"]

# Tells matplotlib to seperate the "Python" section from the others
explode = (0, 0, 0, 0,0,0, 0, 0, 0,0.6,0.6, 0.6, 0.6, 0)

# Creates the pie chart based upon the values above
# Automatically finds the percentages of each part of the pie chart
plt.pie(sizes, explode=explode, labels=labels, colors=None,
       autopct="%1.1f%%", shadow=False, startangle=0)

rcParams['figure.figsize'] = 15, 15

# Tells matplotlib that we want a pie chart with equal axes
plt.axis("equal")




## 2nd Plot: Number of offenses by time of day fro each observed year

In [None]:
#getting data for each our for each year

#Year 2015
hour_2015 = df_15['FIRST_OCCURRENCE_DATE'].dt.hour
hour_2015_count = hour_2015.value_counts()
#Time of day for 2015
morning_2015 = hour_2015_count[0]+hour_2015_count[1]+hour_2015_count[2]+hour_2015_count[3]+hour_2015_count[4]+hour_2015_count[5]+hour_2015_count[6]+hour_2015_count[7]+hour_2015_count[8]
afternoon_2015 = hour_2015_count[9]+hour_2015_count[10]+hour_2015_count[11]+hour_2015_count[12]+hour_2015_count[13]+hour_2015_count[14]+hour_2015_count[15]+hour_2015_count[16]
evening_2015 = hour_2015_count[17]+hour_2015_count[18]+hour_2015_count[19]+hour_2015_count[20]+hour_2015_count[21]+hour_2015_count[22]+hour_2015_count[23]

#Year 2016
hour_2016 = df_16['FIRST_OCCURRENCE_DATE'].dt.hour
hour_2016_count = hour_2016.value_counts()
#Time of day for 2016
morning_2016 = hour_2016_count[0]+hour_2016_count[1]+hour_2016_count[2]+hour_2016_count[3]+hour_2016_count[4]+hour_2016_count[5]+hour_2016_count[6]+hour_2016_count[7]+hour_2016_count[8]
afternoon_2016 = hour_2016_count[9]+hour_2016_count[10]+hour_2016_count[11]+hour_2016_count[12]+hour_2016_count[13]+hour_2016_count[14]+hour_2016_count[15]+hour_2016_count[16]
evening_2016 = hour_2016_count[17]+hour_2016_count[18]+hour_2016_count[19]+hour_2016_count[20]+hour_2016_count[21]+hour_2016_count[22]+hour_2016_count[23]

#Year 2017
hour_2017 = df_17['FIRST_OCCURRENCE_DATE'].dt.hour
hour_2017_count = hour_2017.value_counts()
#Time of day for 2017
morning_2017 = hour_2017_count[0]+hour_2017_count[1]+hour_2017_count[2]+hour_2017_count[3]+hour_2017_count[4]+hour_2017_count[5]+hour_2017_count[6]+hour_2017_count[7]+hour_2017_count[8]
afternoon_2017 = hour_2017_count[9]+hour_2017_count[10]+hour_2017_count[11]+hour_2017_count[12]+hour_2017_count[13]+hour_2017_count[14]+hour_2017_count[15]+hour_2017_count[16]
evening_2017 = hour_2017_count[17]+hour_2017_count[18]+hour_2017_count[19]+hour_2017_count[20]+hour_2017_count[21]+hour_2017_count[22]+hour_2017_count[23]
#Created a dictionary of all data
Labels = ['2015 Morning', '2015 Afternoon','2015 Evening','2016 Morning', '2016 Afternoon','2016 Evening','2017 Morning', '2017 Afternoon','2017 Evening']
Times = [morning_2015, afternoon_2015, evening_2015,morning_2016, afternoon_2016, evening_2016,morning_2017, afternoon_2017, evening_2017]
hour_of_crime_dict = {'Time of Day':Labels, 'Crimes': Times}
hour_of_crime_df = pd.DataFrame(hour_of_crime_dict)
#x_axis = np.arange(0,len(morning_2015))
#tick_locations='12:00am-8:00am'


#building the dataframe
twenty_15 = pd.DataFrame(hour_2015_count)
twenty_16= pd.DataFrame(hour_2016_count)
twenty_17= pd.DataFrame(hour_2017_count)
hour_of_crime_df
# Create our x_axis list
x_axis = np.arange(0, 9, 1)
# Creates a list based on the sin of our x_axis values
y_axis = hour_of_crime_df['Crimes']
# Plot both of these lines so that they will appear on our final chart
plt.plot(x_axis, y_axis)

plt.show()

In [None]:
count_chart = offense_categories.plot(kind='bar')

# Set the xlabel and ylabel using class methods
count_chart.set_xlabel("OFFENSE_TYPE_ID")
count_chart.set_ylabel("INCIDENT_ID")

plt.show()

In [None]:
# Split up our data into groups based upon 'offonse codes'
offense_codes = offense_categories.groupby('OFFENSE_CODE')

# Find out how many offenses per each offense code 
offense_code_count = offense_codes['OFFENSE_CODE'].count()
offense_code_count.head()



In [None]:
# Chart our data, give it a title, and label the axes
offense_code_count = offense_code_count.plot(kind="bar", title="Offenses By Category")
offense_code_count.set_xlabel("Offense Codes")
offense_code_count.set_ylabel("Number of offenses")

plt.show()
plt.tight_layout()

In [None]:
x_axis = df_crimemerge_refined["OFFENSE_CATEGORY_ID"]
y_axis = df_crimemerge_refined["OFFENSE_CATEGORY_ID"].count()

plt.bar(x_axis, y_axis)
plt.show()



In [None]:
#Creating plot for 2015 vs Offense Category ID
plt.bar(x_axis_17,y_axis_17,color='blue')
plt.show()

In [None]:
#group the dataframe by offense category and find the most crime category occurance 
df_offense_category = df.groupby('OFFENSE_CATEGORY_ID')
df_offense_category_count = df_offense_category.count()
df_offense_category_count = df_offense_category_count.sort_values(by=['OFFENSE_ID'], ascending = False)
df_offense_category_count.head()



In [None]:
#group the dataframe by neighborhoods and find the most crime-ridden neighborhoods  
df_neighborhood = df.groupby('NEIGHBORHOOD_ID')
df_neighborhood_count = df_neighborhood.count()
df_neighborhood_count = df_neighborhood_count.sort_values(by = ['INCIDENT_ID'], ascending = False)
df_neighborhood_count.head()

In [None]:
#create histogram for offense catagory
#offense_catagory = dfcrimemerge.groupby('OFFENSE_CATEGORY_ID')
#offense_count = offense_catagory['OFFENSE_ID'].count()


In [None]:
df_crimemerge.columns

In [None]:
print(df_crimemerge_refined['GEO_LAT'].describe())
print(df_crimemerge_refined['GEO_LON'].describe())

In [None]:
northeast_list = [] 
northwest_list = [] 
southeast_list = []
southwest_list = []

if lat in df_crimemerge_refined['GEO_LAT'] > 39.739802 and lon in df_crimemerge_refined['GEO_LON'] > -104.982737: 
    northeast_list.append[]
    
     
            
        
    

In [None]:
#Groupby the mean of each neighborhood 
df_neighborhood_mean = df_crimemerge_refined.groupby('NEIGHBORHOOD_ID').mean()
df_neighborhood_mean

#Groupby the count of each neighborhood
df_neighborhood_crime_count = df_crimemerge_refined.groupby('NEIGHBORHOOD_ID').count()
df_neighborhood_crime_count

#Merge two dataframes together 
df_neighborhood = pd.merge(df_neighborhood_mean, df_neighborhood_crime_count, on = 'NEIGHBORHOOD_ID', how = 'outer')

#only select 3 columns 
df_neighborhood = df_neighborhood[['GEO_LAT_x', 'GEO_LON_x', 'INCIDENT_ID_y']]


In [None]:
#df_neighborhood_crime_count.head()
df_neighborhood

In [None]:
df_crimemerge_refined['GEO_LAT_x', GEO_LAT_x].min()

In [None]:
df_15_top_5 = pd.DataFrame(df_15['OFFENSE_CATEGORY_ID'][0:5].value_counts())
df_16_top_5 = pd.DataFrame(df_16['OFFENSE_CATEGORY_ID'][0:5].value_counts())
df_17_top_5 = pd.DataFrame(df_17['OFFENSE_CATEGORY_ID'][0:5].value_counts())


In [None]:
#Finding Location Point Map 
locations = df_neighborhood[["GEO_LAT_x", "GEO_LON_x"]].astype(float)
markers = gmaps.marker_layer(locations)
fig.add_layer(markers)
fig

In [None]:
df_crimemerge_refined.columns

In [None]:
# Configure gmaps with API key
gmaps.configure(api_key=gkey)

In [None]:
# Store 'Lat' and 'Lng' into  locations 
locations = df_neighborhood[["GEO_LAT_x", "GEO_LON_x"]].astype(float)

# Convert crime rate to float and store
# HINT: be sure to handle NaN values
crime_rate = df_neighborhood["INCIDENT_ID_y"].astype(int)


In [None]:
crime_rate.head()

In [None]:
# Create a poverty Heatmap layer
fig = gmaps.figure(map_type = 'HYBRID')

heat_layer = gmaps.heatmap_layer(locations, weights = crime_rate,
                                 dissipating=True, max_intensity=2000,
                                 point_radius = 20)

# Adjust heat_layer setting to help with heatmap dissipating on zoom
heat_layer.dissipating = True
heat_layer.max_intensity = 2000
heat_layer.point_radius = 20

fig.add_layer(heat_layer)

fig

In [None]:
df_crimemerge_refined.head()

In [None]:
#selecting top 5 crimes for 2015 
df_15_top_5 = pd.DataFrame(df_15['OFFENSE_CATEGORY_ID'].value_counts())
df_15_top_5 = df_15_top_5[0:5] 
print(df_15_top_5)

#selecting top 5 crimes for 2016 
df_16_top_5 = pd.DataFrame(df_16['OFFENSE_CATEGORY_ID'].value_counts())
df_16_top_5 = df_16_top_5[0:5] 
print(df_16_top_5)


#selecting top 5 crimes for 2017 
df_17_top_5 = pd.DataFrame(df_17['OFFENSE_CATEGORY_ID'].value_counts())
df_17_top_5 = df_17_top_5[0:5] 
print(df_17_top_5)

In [None]:
#Merger the top 5 crime data of 2015 and 2016 
df_1516_top_5 = pd.merge(df_15_top_5, df_16_top_5, left_index=True, right_index=True)

#Merger the top 5 crime data of 2015, 2016 and 2017
df_151617_top_5 = pd.merge(df_1516_top_5, df_17_top_5, left_index=True, right_index=True)


In [None]:
#Selecting values over years for each crime type
y1 = [23305, 23738, 24242]
y2 = [15784, 15784, 17260]
y3 = [14161, 14161, 14649]
y4 = [9791, 9914, 9344]
y5 = [6262, 6568, 7333]

x = np.arange(len(y1))



In [None]:
#Plot data with multiple bar graphs side by side 

bar_width = 0.15
plt.bar(x, y1, width = bar_width)
plt.bar(x + bar_width , y2, width = bar_width)
plt.bar(x + bar_width*2, y3, width = bar_width)
plt.bar(x + bar_width*3, y4, width = bar_width)
plt.bar(x + bar_width*4, y5, width = bar_width)
plt.xticks(np.arange(3), ('2015', '2016', '2017'))
plt.legend(['traffic-accident', 'all-other-crimes', 'larceny', 'public-disorder', 'theft-from-motor-vehicle'], title = 'Crime Types')
plt.xlabel ('Year')
plt.ylabel ('Crime Count')
plt.title('Top 5 Crime Types over Years')
rcParams['figure.figsize'] = 10, 10
plt.show()