# 5 Years of Crime Incidents
Cleaned by: Raymond Arevalo

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import scipy.io as io
import scipy.signal as sig
import math as math
import random 
from scipy import integrate
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
plt.style.use('seaborn-colorblind')
plt.rcParams['image.cmap'] = 'RdBu'

In [2]:
import matplotlib.pyplot as plt
import h5py

# Loading files

In [3]:
dfIncidents = pd.read_csv('incidents-5y.csv')
dfcode = pd.read_csv('nbrhcodes.csv')
dfzip = pd.read_csv('area_zip_dict.csv')

FileNotFoundError: File b'incidents-5y.csv' does not exist

# Examining Data

In [None]:
# Examine Incidents data
print(dfIncidents.head(5))

In [None]:
# Examine Incidents data
print(dfcode.head(5))

In [None]:
# Examine Incidents data
print(dfzip.head(5))

# Cleaning Data

In [None]:
# Grabs necessary crime data (year, type, neighborhood)
dfIncidents = dfIncidents[['year', 'type', 'nbrhood']]

In [None]:
# Grabs neighborhood codes
dfcode = dfcode[['code','name']]

In [None]:
# Replaces city codes in dfIncidents with city names
for i in range(len(dfcode)):
    name = dfcode['name'][i]
    key_code = dfcode['code'][i]
    
    dfIncidents.replace(to_replace = key_code, value = name, inplace=True)

In [None]:
# Replace spaces with '_' in neighborhood names
dfIncidents['nbrhood'] = [str(x.replace(" ","_")) for x in dfIncidents['nbrhood'][:]]

In [None]:
# Remove rows where neighborhood is not available
dfIncidents = dfIncidents[(dfIncidents.nbrhood != 'NONE')]

In [None]:
# Remove neighborhoods in for which we don't have the zipcode data for
area = dfzip['Area'].values
dfIncidents = dfIncidents[dfIncidents['nbrhood'].isin(area)]

In [None]:
# Reset index of dfIncidents
dfIncidents = dfIncidents.reset_index()
del dfIncidents['index']

In [None]:
# Examine Data again
dfIncidents.head(10)

# Create new dataframe

In [None]:
# Sort values and group data by year and neighorhood to get total crimecount
dfIncidents = dfIncidents.sort_values(['year', 'nbrhood'], ascending=[True, True])
dfgroup = dfIncidents.groupby(["year", "nbrhood"]).size()


# Take series and convert to dataframe. Reset index and rename columns
dfsort = dfgroup.to_frame()
dfsort = dfsort.reset_index()
dfsort.columns = ['year', 'nbrhood', 'crimecount']

In [None]:
print(dfsort)

# Cleaning new data frame

In [None]:
# Appends a new column that contains zip codes of each neighborhood
zips = []
for i in range(len(dfsort['nbrhood'])):
    name = dfsort['nbrhood'][i]
    for j in range(len(dfzip['Zipcode'])):
        area = dfzip['Area'][j]
        zipcode = dfzip['Zipcode'][j]
        if name == area:
            zips.append(zipcode)

In [None]:
dfsort['zipcode'] = pd.Series(zips, index = dfsort.index)

In [None]:
# Detach neighborhood column
del dfsort['nbrhood']

In [None]:
# sort by zipcode
dfsort = dfsort.sort_values(['zipcode', 'year'],ascending = [True, True])
print(dfsort)

In [None]:
# Resets index
dfsort = dfsort.reset_index()
del dfsort['index']

In [None]:
dfsort

In [None]:
# Replacing space character with NULL character
#dfsort['zipcode'] = [int(x.replace(" ","")) for x in dfsort['zipcode'][:]]

In [None]:
print(dfsort)

# Visualizing Data

In [None]:

year_range = dfsort['year'].value_counts()
years = len(year_range)
year_list = list(year_range.keys())
year_list = sorted(year_list, key=int)   

zip_range = dfsort['zipcode'].value_counts()
zip_num = len(zip_range)
zip_list = list(zip_range.keys())
zip_list = sorted(zip_list, key = int)
zip_list



In [None]:
# Creates bread for all of the years
crimebread = np.zeros((years,zip_num))
for i in range(years):
    for zips in zip_list:
        crimebread[i][:] = dfsort['crimecount'][dfsort['year'] == year_list[i]]

In [None]:
# graph crime rates throughout years 2007 - 2012 for all San Diego City Neighborhoods
plt.xlabel('Years since 2007')
plt.ylabel('Crime Rates in San Diego Neighborhoods')
_ = plt.plot(crimebread[:][:])



In [None]:
# Creates bread for just one year
crimeslice = crimebread[4][:]

In [None]:
# graph crime rates throughout years 2007 - 2012 for all San Diego City Neighborhoods
plt.xlabel('Zip codes')
plt.ylabel('Crime Rates in San Diego Neighborhoods')
_ = plt.plot(crimeslice[:][:])

In [None]:
listSelected = ['Oceanside', 'Vista', 'Coronado', 'National_City', 'Chula_Vista', 'Jamul', 'Spring_Valley', 'Lemon_Grove', 'La_Mesa', 'El_Cajon', 'Santee', 'Lakeside', 'Alpine', 'Poway', 'Ramona', 'Escondido', 'San_Marcos', 'Valley_Center', 'Fallbrook']
crimezipss = []
# if foo is None
print(dfzip[dfzip.Area == 'Oceanside'].Zipcode)
#crimezipss.append(dfzip[dfzip.Area == 'Oceanside'].Zipcode)
#crimezipss.append(dfzip[dfzip.Area == 'Coronado'].Zipcode)
print(crimezipss)



In [None]:
# prices across zipcodes color = each year
plt.xlabel('zipcodes')
plt.ylabel('crime rates in SD neighborhoods')
zipcrime_2008 = []
zipcrime_2009 = []
zipcrime_2010 = []
zipcrime_2011 = []
zipcrime_2012 = []

for j in range(len(crimebread)):
    for i in range(len(crimebread[j])):
        if j == 0:
            zipcrime_2008.append(crimebread[j][i])
        if j == 1:
            zipcrime_2009.append(crimebread[j][i])
        if j == 2:
            zipcrime_2010.append(crimebread[j][i])
        if j == 3:
            zipcrime_2011.append(crimebread[j][i])
        if j == 4:
            zipcrime_2012.append(crimebread[j][i])



plt.subplot(1,2,2)
plt.title(year_list[0])
plt.bar(list(range(31)), zipcrime_2008)

plt.figure(2,figsize=(15,5))
plt.subplot(1,2,1)
plt.title(year_list[1])
plt.bar(list(range(31)), zipcrime_2009)
plt.subplot(1,2,2)
plt.title(year_list[2])
plt.bar(list(range(31)), zipcrime_2010)

plt.figure(3,figsize=(15,5))
plt.subplot(1,2,1)
plt.title(year_list[3])
plt.bar(list(range(31)), zipcrime_2011)
plt.subplot(1,2,2)
plt.title(year_list[4])
plt.bar(list(range(31)), zipcrime_2012)

In [None]:

# accessing by each zipcode
plt.xlabel('years since 2008')
plt.ylabel('crime rates in neighborhood')
# color: each zipcode
_ = plt.plot(crimebread.T[:][11]) #La Jolla



In [None]:
# Change of crime rate

# Exporting Data

In [None]:
import matplotlib.pyplot as plt
import h5py

In [104]:
#creating data
#cogs108h5 = h5py.File('cogs108.h5', 'a')
#cogs108h5.create_dataset('crime_rate_5y_all', data = crimeslice )
#cogs108h5.close()

In [None]:
#cogs108h5 = h5py.File('cogs108.h5', 'a')
#cogs108h5.create_dataset('crime_rate_5y_selected', data = crimeslice )
#cogs108h5.close()

In [102]:
#load data from h5 
h5_file = 'cogs108.h5'

In [105]:
#reading data 2012
with h5py.File(h5_file, 'r') as h5:
    dset = h5['crime_rate_5y_all']

In [None]:
#reading data for selected zip codes in 2012
with h5py.File(h5_file, 'r') as h5:
    dset = h5['crime_rate_5y_selected']