In [1]:
import pandas as pd
import altair as alt

Assumes the data from [NYC Open Data](https://data.cityofnewyork.us/Social-Services/Rat-Sightings/3q43-55fe) on rat sightings is saved as `Rat_Sightings.csv` in the same directory. Removes rows that don't have any useful values.

In [9]:
useless_rows = ['Complaint Type',
                  'Descriptor',
                  'Vehicle Type',
                  'Taxi Company Borough',
                  'Taxi Pick Up Location',
                  'Bridge Highway Name',
                  'Bridge Highway Direction',
                  'Road Ramp',
                  'Bridge Highway Segment',
                  'Agency',
                  'Facility Type',
                  'Park Facility Name',
                  'Agency Name',
                  
                  # Not exactly useless, but low priority compared
                  # and we need to shave down MBs
                  'Intersection Street 1',
                  'Intersection Street 2',
                  'Cross Street 1',
                  'Cross Street 2',
                
                  # already covered by individual lat/lon categories
                  'Location' 
                  ]

In [10]:
df_raw = pd.read_csv("Rat_Sightings.csv")

In [11]:
list(map(lambda c: df_raw[c].unique(), useless_rows))

[array(['Rodent'], dtype=object),
 array(['Rat Sighting'], dtype=object),
 array([nan]),
 array([nan]),
 array([nan]),
 array([nan]),
 array([nan]),
 array([nan]),
 array([nan]),
 array(['DOHMH'], dtype=object),
 array([nan]),
 array(['Unspecified'], dtype=object),
 array(['Department of Health and Mental Hygiene'], dtype=object),
 array(['21 AVENUE', 'AMSTERDAM AVENUE', 'SOUTHERN BOULEVARD', ...,
        'GOWANUS EXPRESSWAY EXIT 17 WB', 'PARK LANE SOUTH',
        'HEENAN AVENUE'], dtype=object),
 array(['DITMARS BOULEVARD', 'BROADWAY', 'BRUCKNER BOULEVARD BIKE PATH',
        ..., 'PRINCETON AVENUE', 'HARMON STREET', 'CHITTENDEN AVENUE'],
       dtype=object),
 array(['21 AVENUE', 'AMSTERDAM AVENUE', 'SOUTHERN BOULEVARD', ...,
        'JOURNEAY STREET', 'SCARBORO AVENUE', 'RUTHERFORD PLACE'],
       dtype=object),
 array(['DITMARS BOULEVARD', 'BROADWAY', 'BRUCKNER BOULEVARD BIKE PATH',
        ..., 'HAMILTON TERRACE', 'MAJOR DEEGAN EP NB EN E 135 ST',
        'COLD SPRING ROAD'], dtype

In [12]:
df = df_raw.drop(useless_rows, axis=1)
df

Unnamed: 0,Unique Key,Created Date,Closed Date,Location Type,Incident Zip,Incident Address,Street Name,Address Type,City,Landmark,Status,Due Date,Resolution Action Updated Date,Community Board,Borough,X Coordinate (State Plane),Y Coordinate (State Plane),Park Borough,Latitude,Longitude
0,47741820,09/30/2020 05:54:51 PM,,3+ Family Mixed Use Building,11105.0,21-77 33 STREET,33 STREET,,ASTORIA,33 STREET,In Progress,,,01 QUEENS,QUEENS,1009475.0,221980.0,QUEENS,40.775924,-73.908924
1,47740411,09/30/2020 08:18:28 PM,,3+ Family Apt. Building,10031.0,517 WEST 147 STREET,WEST 147 STREET,,NEW YORK,WEST 147 STREET,In Progress,,,09 MANHATTAN,MANHATTAN,998813.0,240523.0,MANHATTAN,40.826844,-73.947379
2,47739788,09/30/2020 04:29:45 PM,,Government Building,10459.0,1035 LONGWOOD AVENUE,LONGWOOD AVENUE,,BRONX,LONGWOOD AVENUE,In Progress,,,02 BRONX,BRONX,1013037.0,236657.0,BRONX,40.816198,-73.896000
3,47738778,09/30/2020 11:13:17 AM,,Commercial Building,11211.0,496 GRAND STREET,GRAND STREET,,BROOKLYN,GRAND STREET,In Progress,,,01 BROOKLYN,BROOKLYN,997740.0,198290.0,BROOKLYN,40.710927,-73.951341
4,47739099,09/30/2020 02:31:00 PM,,1-2 Family Dwelling,11203.0,EAST 51 STREET,EAST 51 STREET,,,,In Progress,,,17 BROOKLYN,BROOKLYN,1003582.0,179979.0,BROOKLYN,40.660656,-73.930321
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153406,42039339,03/25/2019 12:00:00 AM,03/25/2019 12:00:00 AM,3+ Family Apt. Building,10014.0,47 PERRY STREET,PERRY STREET,ADDRESS,NEW YORK,,Closed,04/24/2019 12:18:05 AM,03/25/2019 12:00:00 AM,02 MANHATTAN,MANHATTAN,983421.0,207274.0,MANHATTAN,40.735596,-74.002991
153407,42041693,03/24/2019 12:00:00 AM,03/24/2019 12:00:00 AM,3+ Family Apt. Building,10009.0,153 AVENUE A,AVENUE A,LATLONG,NEW YORK,,Closed,04/23/2019 11:51:59 AM,03/24/2019 11:55:26 AM,Unspecified MANHATTAN,MANHATTAN,989045.0,204411.0,MANHATTAN,40.727739,-73.982700
153408,42041694,03/24/2019 12:00:00 AM,03/26/2019 12:00:00 AM,Commercial Building,11204.0,6215 20 AVENUE,20 AVENUE,LATLONG,BROOKLYN,,Closed,04/23/2019 08:38:47 PM,03/26/2019 12:00:00 AM,Unspecified BROOKLYN,BROOKLYN,988635.0,164344.0,BROOKLYN,40.617764,-73.984205
153409,42042459,03/24/2019 12:00:00 AM,03/24/2019 12:00:00 AM,3+ Family Apt. Building,10455.0,756 KELLY STREET,KELLY STREET,LATLONG,BRONX,,Closed,04/23/2019 10:26:37 PM,03/24/2019 12:00:00 AM,Unspecified BRONX,BRONX,1012293.0,237015.0,BRONX,40.817185,-73.898686


In [13]:
list(df.columns)

['Unique Key',
 'Created Date',
 'Closed Date',
 'Location Type',
 'Incident Zip',
 'Incident Address',
 'Street Name',
 'Address Type',
 'City',
 'Landmark',
 'Status',
 'Due Date',
 'Resolution Action Updated Date',
 'Community Board',
 'Borough',
 'X Coordinate (State Plane)',
 'Y Coordinate (State Plane)',
 'Park Borough',
 'Latitude',
 'Longitude']

In [16]:
#df['Intersection Street 2'].unique()

In [None]:
df.to_csv('rats.csv')