In [44]:
import pandas as pd
from datetime import datetime, timedelta
import json, requests


In [45]:
def prepare_sonar_gid_data():
    gid_csv = '/data/prod/get_it_done_311_requests_datasd.csv'

    # Read CSV
    gid = pd.read_csv(gid_csv)

    # Set accepted fields
    fields = ['service_request_id', 'requested_datetime', 'closed_date', 'source',
     'service_name', 'service_subtype', 'status_description', 'lat', 'long']

    #Filter on field
    gid = gid[fields]

    # Convert datetime columns
    gid['requested_datetime'] = pd.to_datetime(gid['requested_datetime'])
    gid['closed_date'] = pd.to_datetime(gid['closed_date'])

    return gid







In [46]:
gid = prepare_sonar_gid_data()
gid.dtypes
gid.service_name.unique()

array(['Curb', 'Graffiti', 'Pothole', 'Sidewalk', 'Other',
       'Abandoned Vehicle', 'Litter/Dumping', 'Dead Animal',
       'Traffic Sign', 'Traffic Signal', 'Storm Drain', 'Street Light',
       'Tree Hazard', nan, 'Faded striping', 'Street Sweeping',
       'Damaged Guardrail', 'Street Flooded', 'Parking Meter',
       'Illegal discharge', 'Over Irrigation'], dtype=object)

In [55]:
days_7 = datetime.now() - timedelta(days=7)
days_30 = datetime.now() - timedelta(days=30)

range_start = days_30

#Get only closed potholes
closed_mask = (((gid.service_name == 'Pothole') | (gid.service_name == 'Graffiti')) & (gid.status_description == 'Closed'))
open_mask = (((gid.service_name == 'Pothole') | (gid.service_name == 'Graffiti')) &\
              ((gid.status_description == 'New') | (gid.status_description == 'Acknowledged') | (gid.status_description == 'Assigned') | (gid.status_description == 'Duplicate')))
#mask = (((gid.service_name == 'Pothole')) & (gid.status_description == 'Closed'))

gid2 = gid[closed_mask]
gid3 = gid[open_mask]
#gid_ph_closed = gid_ph_closed.copy()

# Get closed potholes, last x days
gid2 = gid2[gid2['closed_date'] >= range_start]
gid2['open_len'] = gid2['closed_date'] - gid2['requested_datetime']
gid2.reset_index(inplace=True)

gid2.head(5)

Unnamed: 0,index,service_request_id,requested_datetime,closed_date,source,service_name,service_subtype,status_description,lat,long,open_len
0,39,177233,2017-04-25 09:22:00,2017-04-25 09:24:00,Crew/Self Generated,Graffiti,,Closed,32.790413,-117.173146,0 days 00:02:00
1,479,176779,2017-04-24 09:43:00,2017-04-25 00:03:00,Web,Pothole,,Closed,32.812063,-117.216127,0 days 14:20:00
2,738,176517,2017-04-23 13:54:00,2017-04-25 00:02:00,Phone,Pothole,,Closed,32.7157,-117.161093,1 days 10:08:00
3,886,176369,2017-04-23 06:27:00,2017-04-25 09:15:00,Web,Graffiti,,Closed,32.573401,-117.054985,2 days 02:48:00
4,1015,176238,2017-04-22 17:31:00,2017-04-25 00:02:00,Phone,Pothole,,Closed,32.801138,-117.140358,2 days 06:31:00


In [56]:
delta = gid2['open_len'][4]
delta

Timedelta('2 days 06:31:00')

In [57]:
delta.total_seconds() / 86400

2.2715277777777776

In [58]:
gid2['open_len'] = gid2['open_len'].map(lambda x: x.total_seconds() / 86400)
gid2.head()

Unnamed: 0,index,service_request_id,requested_datetime,closed_date,source,service_name,service_subtype,status_description,lat,long,open_len
0,39,177233,2017-04-25 09:22:00,2017-04-25 09:24:00,Crew/Self Generated,Graffiti,,Closed,32.790413,-117.173146,0.001389
1,479,176779,2017-04-24 09:43:00,2017-04-25 00:03:00,Web,Pothole,,Closed,32.812063,-117.216127,0.597222
2,738,176517,2017-04-23 13:54:00,2017-04-25 00:02:00,Phone,Pothole,,Closed,32.7157,-117.161093,1.422222
3,886,176369,2017-04-23 06:27:00,2017-04-25 09:15:00,Web,Graffiti,,Closed,32.573401,-117.054985,2.116667
4,1015,176238,2017-04-22 17:31:00,2017-04-25 00:02:00,Phone,Pothole,,Closed,32.801138,-117.140358,2.271528


In [59]:
gid2.shape

(3548, 11)

In [60]:
gid2[['service_name', 'open_len']].groupby('service_name').mean()
# Avg Closure Time (past 30)

Unnamed: 0_level_0,open_len
service_name,Unnamed: 1_level_1
Graffiti,41.264613
Pothole,40.341117


In [61]:
gid2[['service_name', 'open_len']].groupby('service_name').count()
# Requests (past 30)

Unnamed: 0_level_0,open_len
service_name,Unnamed: 1_level_1
Graffiti,1126
Pothole,2422


In [62]:
gid3.reset_index(inplace=True)
gid3.head()

Unnamed: 0,index,service_request_id,requested_datetime,closed_date,source,service_name,service_subtype,status_description,lat,long
0,1,177274,2017-04-25 10:22:00,NaT,Web,Graffiti,,New,32.569986,-117.083789
1,2,177273,2017-04-25 10:21:00,NaT,Web,Pothole,,Acknowledged,32.778073,-117.140118
2,3,177272,2017-04-25 10:20:00,NaT,Web,Pothole,,New,32.89416,-117.119412
3,9,177266,2017-04-25 10:12:00,NaT,Mobile,Graffiti,,New,32.732021,-117.108138
4,11,177264,2017-04-25 10:10:00,NaT,Web,Graffiti,,New,32.579685,-117.097321


In [63]:
gid3[['service_name', 'service_request_id']].groupby('service_name').count()

Unnamed: 0_level_0,service_request_id
service_name,Unnamed: 1_level_1
Graffiti,2890
Pothole,6446
