In [23]:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt

In [292]:
raw = pd.read_csv('sanfrancisco_incidents_summer_2014.csv')
raw

Unnamed: 0,IncidntNum,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Resolution,Address,X,Y,Location,PdId
0,140734311,ARSON,ARSON OF A VEHICLE,Sunday,08/31/2014,23:50,BAYVIEW,NONE,LOOMIS ST / INDUSTRIAL ST,-122.405647,37.738322,"(37.7383221869053, -122.405646994567)",14073431126031
1,140736317,NON-CRIMINAL,LOST PROPERTY,Sunday,08/31/2014,23:45,MISSION,NONE,400 Block of CASTRO ST,-122.435012,37.761768,"(37.7617677182954, -122.435012093789)",14073631771000
2,146177923,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Sunday,08/31/2014,23:30,SOUTHERN,NONE,1000 Block of MISSION ST,-122.409795,37.780036,"(37.7800356268394, -122.409795194505)",14617792306244
3,146177531,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Sunday,08/31/2014,23:30,RICHMOND,NONE,FULTON ST / 26TH AV,-122.485263,37.772518,"(37.7725176473142, -122.485262988324)",14617753106244
4,140734220,NON-CRIMINAL,FOUND PROPERTY,Sunday,08/31/2014,23:23,RICHMOND,NONE,800 Block of LA PLAYA ST,-122.509895,37.772313,"(37.7723131976814, -122.509895418239)",14073422072000
5,140734349,DRUG/NARCOTIC,POSSESSION OF MARIJUANA,Sunday,08/31/2014,23:13,SOUTHERN,"ARREST, BOOKED",11TH ST / MINNA ST,-122.416578,37.773907,"(37.773907074489, -122.416578493475)",14073434916010
6,140734349,DRUG/NARCOTIC,POSSESSION OF CONTROLLED SUBSTANCE FOR SALE,Sunday,08/31/2014,23:13,SOUTHERN,"ARREST, BOOKED",11TH ST / MINNA ST,-122.416578,37.773907,"(37.773907074489, -122.416578493475)",14073434916662
7,140734349,DRIVING UNDER THE INFLUENCE,DRIVING WHILE UNDER THE INFLUENCE OF ALCOHOL,Sunday,08/31/2014,23:13,SOUTHERN,"ARREST, BOOKED",11TH ST / MINNA ST,-122.416578,37.773907,"(37.773907074489, -122.416578493475)",14073434965050
8,140738147,OTHER OFFENSES,EVADING A POLICE OFFICER RECKLESSLY,Sunday,08/31/2014,23:00,INGLESIDE,NONE,1400 Block of DOLORES ST,-122.424498,37.746649,"(37.7466488718366, -122.424497944857)",14073814727175
9,140734258,TRESPASS,TRESPASSING,Sunday,08/31/2014,23:00,CENTRAL,"ARREST, CITED",900 Block of MASON ST,-122.410846,37.792316,"(37.7923158747647, -122.410845624227)",14073425827195


In [295]:
# Overall Piechart Category
cate = raw.groupby(by = 'Category').count()
cate = cate.sort('IncidntNum', ascending = False)
cate_pct = 100. * cate.IncidntNum/sum(cate.IncidntNum)

fig = plt.figure(figsize = (20, 10))
ax = fig.add_subplot(111)
plt.axis('equal')

cate_pie = ax.pie(cate.IncidntNum, colors = colors)
for pie_wedge in cate_pie[0]:
    pie_wedge.set_edgecolor('white')
    
colors = plt.cm.prism(np.linspace(0., 1., len(cate.IncidntNum)))
labels = ['{0} - {1:1.2f} %'.format(i, j) for i,j in zip(cate.index, cate_pct)]
plt.legend(labels, loc = 2, fontsize = 10, frameon = False)

ax.set_title("Categories of Incident in SF, 2014 Summer")
plt.savefig('pie_cate_all.png')

In [296]:
# Overall pie chart resolution
res = raw.groupby(by = 'Resolution').count()
res = res.sort('IncidntNum', ascending = False)
res_pct = 100. * res.IncidntNum/sum(res.IncidntNum)

fig = plt.figure(figsize = (20, 10))
ax = fig.add_subplot(111)
plt.axis('equal')

res_pie = ax.pie(res.IncidntNum, colors = colors)
for pie_wedge in res_pie[0]:
    pie_wedge.set_edgecolor('white')

colors = plt.cm.prism(np.linspace(0., 1., len(res.IncidntNum)))
labels = ['{0} - {1:1.2f} %'.format(i, j) for i,j in zip(res.index, res_pct)]
plt.legend(labels, loc = 2, fontsize = 10, frameon = False)


ax.set_title("Resolutions of Incident in SF, 2014 Summer")
plt.savefig('pie_res_all.png')

In [297]:
# Overall pie chart District
dis = raw.groupby(by = 'PdDistrict').count()
dis = dis.sort('IncidntNum', ascending = False)
dis_pct = 100. * dis.IncidntNum/sum(dis.IncidntNum)

fig = plt.figure(figsize = (20, 10))
ax = fig.add_subplot(111)
plt.axis('equal')

dis_pie = ax.pie(dis.IncidntNum, colors = colors)
for pie_wedge in dis_pie[0]:
    pie_wedge.set_edgecolor('white')

colors = plt.cm.prism(np.linspace(0., 1., len(dis.IncidntNum)))
labels = ['{0} - {1:1.2f} %'.format(i, j) for i,j in zip(dis.index, dis_pct)]
plt.legend(labels, loc = 2, fontsize = 10, frameon = False)


ax.set_title("Incidents by District in SF, 2014 Summer")
plt.savefig('pie_dis_all.png')

In [300]:
# total by day, no need for hist
time = raw.groupby(by = 'Time').count()['IncidntNum']
time.index = pd.to_datetime(time.index, unit = 'm').hour
time = time.groupby(by = time.index).sum()

fig = plt.figure(figsize = (20, 10))
ax = fig.add_subplot(111)
ax.set_xlim([0, 24])
ax.bar(time.index, time, label = "# incidents by hour")
ax.plot(time.index, time, color = 'red')

ax.set_title("Total Incident by Time of One Day in SF Data")
ax.set_xlabel('Hour', fontsize = 15)
ax.set_ylabel('# Incidents', fontsize = 15)
plt.legend()

plt.savefig('byday_all.png')

In [423]:
# total by date
date = raw.groupby(by = 'Date').count()['IncidntNum']
date.index = pd.to_datetime(date.index, format = '%m/%d/%Y')

fig = plt.figure(figsize = (20, 10))
ax = fig.add_subplot(111)
ax.plot_date(date.index, date, ls = '-')

ax.set_xlabel('Date')
ax.set_ylabel('# Incidents')
ax.set_title("# Incidents by Date in SF Data")
plt.savefig("bydate_all.png")

In [422]:
# total by day of the week
wkd = raw.groupby(by = 'DayOfWeek').count()['IncidntNum']
widx = pd.to_datetime(wkd.index).weekday
cnmb = pd.DataFrame(wkd)
cnmb['widx'] = (widx)
cnmb.sort('widx')

fig = plt.figure(figsize = (20, 10))
ax = fig.add_subplot(111)

ax.bar(cnmb.widx, cnmb.IncidntNum, align = 'center')
plt.xticks(cnmb.widx, cnmb.index)
plt.ylim([3500, max(cnmb.IncidntNum)+100])

ax.set_title("Incident by Day of Week")
ax.set_ylabel("# Incidents")
ax.set_xlabel('Day of Week')

plt.savefig('bydw_all.png')