In [1]:
import pandas as pd
import os

In [2]:
nypd_ds = '../nypd_ds/NYPD_Complaint_Data_Historic.csv'

<div class="inner_cell">
<div class="text_cell_render border-box-sizing rendered_html">
<p>This is not an easy visualization to create, so below is a little guide to help you get started.</p>
<h3 id="Get-the-data">Get the data<a class="anchor-link" href="#Get-the-data">¶</a></h3><p>Using Python, R or another programming language of choice, create a <code>.csv</code> or <code>.json</code> containing the murders of 2016. Remember to extract at least the time and geoposition of each murder from the big dataset.</p>
<h3 id="Figure-out-the-map">Figure out the map<a class="anchor-link" href="#Figure-out-the-map">¶</a></h3><ul>
<li>Start by generating the map of the 5 boroughs in NYC in D3, you can use this <a href="https://github.com/dwillis/nyc-maps/blob/master/boroughs.geojson">GeoJSON file of the NYC boroughs</a>.</li>
<li>Chapter 14 in the IDV book has all the information you need for this part, so make sure to read it.</li>
<li>For this data we recommend using the <code>d3.GeoMercator()</code> projection instead of the UsaAlbers projection used in the book.</li>
<li>If your map doesn't show up, consider the <em>scale</em>, <em>center</em> and <em>translation</em> parameters of your <code>d3.GeoMercator()</code></li>
<li>Once you have the map, add in the murder data-points using your <code>.csv</code> or <code>.json</code> data and style them to your liking.</li>
</ul>
<h3 id="Barchart-is-next">Barchart is next<a class="anchor-link" href="#Barchart-is-next">¶</a></h3><p>Setup your bar chart somewhere near to your map. As shown in the video above, we want it to show the number of murders each hour of the day. So what is the total number of murders between midnight and 1am, total between 1am and 2am, etc. Don't forget a nice title and labels for the axes.</p>
<p>When you generate the data to load into D3, it is a good idea to include <em>hour of each murder</em> (in 24-hour time) in the file. That way you make generating the histogram counts in JavaScript easier.</p>
<h3 id="Brushing">Brushing<a class="anchor-link" href="#Brushing">¶</a></h3><p>Now we want to be able select areas of interest in the map. We will do this using a d3.brush. The idea is that the bar chart should now show the temporal distribution of the selected points.</p>
<p>Start by defining your brush. Take a look at <a href="http://bl.ocks.org/feyderm/6bdbc74236c27a843db633981ad22c1b">this bl.ocks example</a></p>
<p>Your brush should be able to select points in the map, and style the points so that selected points are distinguishable from non-selected points (so the users knows which points are selected).</p>
<ul>
<li><strong>Helpful note #1</strong>: Think about how you determine if a point is within your brush using your projection.
Use the selected points to update your bar chart.</li>
<li><strong>Helpful note #2</strong>: Think about how you will get the data from the selected points, i would recommend re-reading chapter 12 on Selections in IDV, if you have not done so already.</li>
</ul>
<h3 id="Make-it-look-good">Make it look good<a class="anchor-link" href="#Make-it-look-good">¶</a></h3><ul>
<li>Style the map and bar chart in any way you like.</li>
<li>If you want you can add the NYC Borough name to the map by adding a text for each of the 5 features in the geojson file.
<code>.data(json.features).enter().append("text")</code></li>
</ul>

</div>
</div>

In [3]:
pd.read_csv(nypd_ds)

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,CMPLNT_NUM,CMPLNT_FR_DT,CMPLNT_FR_TM,CMPLNT_TO_DT,CMPLNT_TO_TM,RPT_DT,KY_CD,OFNS_DESC,PD_CD,PD_DESC,...,ADDR_PCT_CD,LOC_OF_OCCUR_DESC,PREM_TYP_DESC,PARKS_NM,HADEVELOPT,X_COORD_CD,Y_COORD_CD,Latitude,Longitude,Lat_Lon
0,101109527,12/31/2015,23:45:00,,,12/31/2015,113,FORGERY,729.0,"FORGERY,ETC.,UNCLASSIFIED-FELO",...,44.0,INSIDE,BAR/NIGHT CLUB,,,1007314.0,241257.0,40.828848,-73.916661,"(40.828848333, -73.916661142)"
1,153401121,12/31/2015,23:36:00,,,12/31/2015,101,MURDER & NON-NEGL. MANSLAUGHTER,,,...,103.0,OUTSIDE,,,,1043991.0,193406.0,40.697338,-73.784557,"(40.697338138, -73.784556739)"
2,569369778,12/31/2015,23:30:00,,,12/31/2015,117,DANGEROUS DRUGS,503.0,"CONTROLLED SUBSTANCE,INTENT TO",...,28.0,,OTHER,,,999463.0,231690.0,40.802607,-73.945052,"(40.802606608, -73.945051911)"
3,968417082,12/31/2015,23:30:00,,,12/31/2015,344,ASSAULT 3 & RELATED OFFENSES,101.0,ASSAULT 3,...,105.0,INSIDE,RESIDENCE-HOUSE,,,1060183.0,177862.0,40.654549,-73.726339,"(40.654549444, -73.726338791)"
4,641637920,12/31/2015,23:25:00,12/31/2015,23:30:00,12/31/2015,344,ASSAULT 3 & RELATED OFFENSES,101.0,ASSAULT 3,...,13.0,FRONT OF,OTHER,,,987606.0,208148.0,40.738002,-73.987891,"(40.7380024, -73.98789129)"
5,365661343,12/31/2015,23:18:00,12/31/2015,23:25:00,12/31/2015,106,FELONY ASSAULT,109.0,"ASSAULT 2,1,UNCLASSIFIED",...,71.0,FRONT OF,DRUG STORE,,,996149.0,181562.0,40.665023,-73.957111,"(40.665022689, -73.957110763)"
6,608231454,12/31/2015,23:15:00,,,12/31/2015,235,DANGEROUS DRUGS,511.0,"CONTROLLED SUBSTANCE, POSSESSI",...,7.0,OPPOSITE OF,STREET,,,987373.0,201662.0,40.720200,-73.988735,"(40.720199996, -73.988735082)"
7,265023856,12/31/2015,23:15:00,12/31/2015,23:15:00,12/31/2015,118,DANGEROUS WEAPONS,792.0,WEAPONS POSSESSION 1 & 2,...,46.0,FRONT OF,STREET,,,1009041.0,247401.0,40.845707,-73.910398,"(40.845707148, -73.910398033)"
8,989238731,12/31/2015,23:15:00,12/31/2015,23:30:00,12/31/2015,344,ASSAULT 3 & RELATED OFFENSES,101.0,ASSAULT 3,...,48.0,INSIDE,RESIDENCE - APT. HOUSE,,,1014154.0,251416.0,40.856711,-73.891900,"(40.856711291, -73.891899956)"
9,415095955,12/31/2015,23:10:00,12/31/2015,23:10:00,12/31/2015,341,PETIT LARCENY,338.0,"LARCENY,PETIT FROM BUILDING,UN",...,19.0,INSIDE,DRUG STORE,,,994327.0,218211.0,40.765618,-73.963623,"(40.765617688, -73.96362342)"


In [15]:
if not os.path.exists('nyc_murders.csv'):
    print('creating csv file')
    with open('nyc_murders.csv', 'w'): pass
    
print('reading csv file')
df = pd.read_csv(nypd_ds, usecols=["OFNS_DESC", "RPT_DT", "BORO_NM", "CMPLNT_FR_TM", "Lat_Lon"])
# removing NaN's
df.dropna(inplace=True)
print('removed NaNs')

reading csv file
removed NaNs


In [17]:
murders = df[(df['OFNS_DESC'].str.contains('MURDER')) & (df['RPT_DT'].str.contains('2016'))]
murders.to_csv('nyc_murders.csv')