In [7]:
import pandas as pd
import datetime
import urllib
import plotly.express as px

In [22]:
# import the raw data
apiCall = 'https://data.cityofnewyork.us/resource/fhrw-4uyv.csv?$where=created_date between "2020-01-01T00:00:00" and "2020-03-18T00:00:00"&$limit=500000'.replace(" ","%20")
raw311 = pd.read_csv(apiCall)
raw311.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 407627 entries, 0 to 407626
Data columns (total 45 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   unique_key                      407627 non-null  int64  
 1   created_date                    407627 non-null  object 
 2   closed_date                     363870 non-null  object 
 3   agency                          407627 non-null  object 
 4   agency_name                     407627 non-null  object 
 5   complaint_type                  407627 non-null  object 
 6   descriptor                      400882 non-null  object 
 7   location_type                   282010 non-null  object 
 8   incident_zip                    394006 non-null  float64
 9   incident_address                378751 non-null  object 
 10  street_name                     378738 non-null  object 
 11  cross_street_1                  272538 non-null  object 
 12  cross_street_2  

In [23]:


# set created_date to datetime
raw311['created_date'] = pd.to_datetime(raw311['created_date']).dt.tz_localize('EST')

raw311.info()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 407627 entries, 0 to 407626
Data columns (total 45 columns):
 #   Column                          Non-Null Count   Dtype              
---  ------                          --------------   -----              
 0   unique_key                      407627 non-null  int64              
 1   created_date                    407627 non-null  datetime64[ns, EST]
 2   closed_date                     363870 non-null  object             
 3   agency                          407627 non-null  object             
 4   agency_name                     407627 non-null  object             
 5   complaint_type                  407627 non-null  object             
 6   descriptor                      400882 non-null  object             
 7   location_type                   282010 non-null  object             
 8   incident_zip                    394006 non-null  float64            
 9   incident_address                378751 non-null  object             
 

In [77]:
consumer_complaints = raw311.loc[raw311['complaint_type'] == 'Consumer Complaint']

descriptors = consumer_complaints['descriptor'].unique()

consumer_complaints.tail(500)

# print(descriptors)

Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,location_type,incident_zip,incident_address,...,bridge_highway_direction,road_ramp,bridge_highway_segment,latitude,longitude,location,location_address,location_city,location_state,location_zip
404321,45847378,2020-03-17 11:58:01-05:00,,DCA,Department of Consumer Affairs,Consumer Complaint,Retail Store,,11377.0,44-06 48 AVENUE,...,,,,40.739497,-73.921145,POINT (-73.92114508304748 40.73949664480357),,,,
404323,45847355,2020-03-17 11:58:12-05:00,,DCA,Department of Consumer Affairs,Consumer Complaint,Retail Store,,10039.0,2868 FREDERICK DOUGLAS BOULEVARD,...,,,,40.827610,-73.938417,POINT (-73.9384173553098 40.82761040558357),,,,
404326,45844547,2020-03-17 11:59:22-05:00,,DCA,Department of Consumer Affairs,Consumer Complaint,Retail Store,,11207.0,758 NEW LOTS AVENUE,...,,,,40.666401,-73.882644,POINT (-73.88264385163313 40.666400879138884),,,,
404339,45848237,2020-03-17 12:00:45-05:00,,DCA,Department of Consumer Affairs,Consumer Complaint,Retail Store,,11212.0,405 REMSEN AVENUE,...,,,,40.655750,-73.923234,POINT (-73.92323351548474 40.65574969543047),,,,
404341,45844899,2020-03-17 12:00:46-05:00,,DCA,Department of Consumer Affairs,Consumer Complaint,Tow Truck Company,Commercial Building,10464.0,162 SCHOFIELD STREET,...,,,,40.845311,-73.784785,POINT (-73.7847845050277 40.84531061384322),,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
407537,45844594,2020-03-17 23:31:23-05:00,,DCA,Department of Consumer Affairs,Consumer Complaint,Retail Store,,11412.0,201-01 LINDEN BOULEVARD,...,,,,40.694738,-73.751235,POINT (-73.7512354229416 40.69473752384905),,,,
407562,45846411,2020-03-17 23:39:40-05:00,,DCA,Department of Consumer Affairs,Consumer Complaint,Retail Store,,11354.0,MAIN STREET,...,,,,40.759570,-73.830139,POINT (-73.83013930521312 40.75956956551904),,,,
407590,45844577,2020-03-17 23:47:44-05:00,,DCA,Department of Consumer Affairs,Consumer Complaint,Retail Store,Commercial Building,11222.0,757 MANHATTAN AVENUE,...,,,,40.726646,-73.952441,POINT (-73.95244073826157 40.72664641125155),,,,
407609,45844602,2020-03-17 23:54:39-05:00,,DCA,Department of Consumer Affairs,Consumer Complaint,Retail Store,,11103.0,25-71 STEINWAY STREET,...,,,,40.767528,-73.912022,POINT (-73.91202163614363 40.767527633788944),,,,


In [71]:
# group by day and count

grouped_by_day = (pd.to_datetime(consumer_complaints['created_date'])
       .dt.floor('d')
       .value_counts()
       .rename_axis('date')
       .reset_index(name='count'))

grouped_by_day = grouped_by_day.sort_values(by='date')
print(grouped_by_day.tail(20))

<class 'pandas.core.frame.DataFrame'>
Int64Index: 77 entries, 76 to 0
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype              
---  ------  --------------  -----              
 0   date    77 non-null     datetime64[ns, EST]
 1   count   77 non-null     int64              
dtypes: datetime64[ns, EST](1), int64(1)
memory usage: 1.8 KB


In [46]:
fig = px.line(grouped_by_day, x="date", y="count", title='Daily Count of NYC 311 Requests with type "Consumer Complaint"')
fig.show()

In [32]:
# same chart with total count of complaints by day

all_grouped_by_day = (raw311['created_date']
    .dt.floor('d')
    .value_counts()
    .rename_axis('date')
    .reset_index(name='count'))

all_grouped_by_day = all_grouped_by_day.sort_values(by='date')

chart = px.line(all_grouped_by_day, x="date", y="count", title='Daily Count of NYC 311 Requests')
chart.show()

In [49]:
# what are the top-level complaint types?
complaint_types = raw311['complaint_type'].unique()
print(complaint_types)
# for complaint_type in range(len(complaint_types)): 
#     print(complaint_types[complaint_type])
    


['Food Poisoning' 'Blocked Driveway' 'Noise - Residential'
 'Noise - Helicopter' 'Noise - Commercial' 'HEAT/HOT WATER'
 'Illegal Parking' 'Street Condition' 'Rodent' 'Noise - Street/Sidewalk'
 'Illegal Fireworks' 'PLUMBING' 'DOOR/WINDOW' 'Sidewalk Condition'
 'Taxi Complaint' 'Traffic Signal Condition' 'Dirty Conditions'
 'Building/Use' 'Noise' 'Sewer' 'Drinking' 'Street Light Condition'
 'Noise - Vehicle' 'Animal-Abuse' 'Elevator' 'Derelict Vehicles'
 'Dead/Dying Tree' 'Non-Emergency Police Matter' 'Homeless Encampment'
 'WATER LEAK' 'UNSANITARY CONDITION' 'Boilers'
 'General Construction/Plumbing' 'Street Sign - Missing' 'Water System'
 'For Hire Vehicle Complaint' 'Food Establishment'
 'Special Projects Inspection Team (SPIT)' 'Lead' 'Lost Property'
 'Abandoned Vehicle' 'Hazardous Materials' 'PAINT/PLASTER' 'APPLIANCE'
 'FLOORING/STAIRS' 'Highway Condition' 'Street Sign - Damaged'
 'Consumer Complaint' 'Sanitation Condition' 'Air Quality' 'Taxi Report'
 'Missed Collection (All Mater

In [68]:
#hand-coded list of types of interest
complaint_types = ['Consumer Complaint','Blocked Driveway','Noise - Street/Sidewalk','PLUMBING','DOOR/WINDOW','Noise - Vehicle','WATER LEAK','UNSANITARY CONDITION','Boilers','PAINT/PLASTER','APPLIANCE','FLOORING/STAIRS','Air Quality','GENERAL','Noise - Park','ELECTRIC','SAFETY','Taxi Complaint','For Hire Vehicle Complaint','Lost Property','Panhandling']

for i in range(len(complaint_types)): 
    
    filtered = raw311.loc[raw311['complaint_type'] == complaint_types[i]]
    grouped_by_day = (pd.to_datetime(filtered['created_date'])
       .dt.floor('d')
       .value_counts()
       .rename_axis('date')
       .reset_index(name='count'))

    grouped_by_day = grouped_by_day.sort_values(by='date')
    
    
    chart = px.line(grouped_by_day, x="date", y="count", title=complaint_types[i], width=400, height=300)
    chart.update_yaxes(title_text='')
    chart.update_xaxes(nticks=3, tickformat = '%-d %b', title_text='')
    chart.show()
    chart.write_image("images/%s.svg" % (complaint_types[i].replace(' ', '_').replace('/','_')))
    
