In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter 
from scipy import stats
import geopandas as gpd
from shapely.geometry import Point
import folium
import re

In [3]:
hub = pd.read_excel("data/Trash Hauler Report - Nov 2017 - Nov 2019 Final.xlsx", skiprows=1)

In [4]:
# #rename the columns
hub.columns = [y.lower() for y in [x.strip(' ') if x[-1:] == " " else re.sub(r'\W', '_', x) for x in hub.columns]]

In [5]:
metro = pd.read_csv("data/hubNashville__311__Service_Requests.csv", low_memory=False)

In [6]:
metro.columns = ['request_number' if "#" in x else re.sub(r'\W+', '_', x).lower() for x in metro.columns]

In [7]:
#This fuction works but it is time consuming 
# inProj = Proj(init='epsg:3857')
# outProj = Proj(init='epsg:4326')
# [transform(x, y) for x in hub.state_plan_x for y in hub.state_plan_y]

In [8]:
hub_geo = pd.merge(hub, metro[['request_number', 'latitude', 'longitude']], how='inner', on='request_number')
hub_geo = hub_geo.drop(['state_plan_x', 'state_plan_y'], axis=1)
hub_geo.loc[:, ('geometry')] = hub_geo.apply(lambda x: Point(x.longitude, x.latitude), axis=1)
hub_geo = gpd.GeoDataFrame(hub_geo, crs = {'init': 'epsg:4326'}, geometry = hub_geo['geometry'])

In [9]:
hub_geo.shape

(20226, 12)

**Filter the misses**

In [10]:
hub_geo['description'] = hub_geo['description'].astype(str)
hub_geo['request'] = hub_geo['request'].astype(str)

In [11]:
hub_geo['request_description'] = hub_geo.request + hub_geo.description

In [17]:
hub_miss_geo = hub_geo[hub_geo['request_description'].str.contains('[Mm]iss', regex=True, na=False)]

**standardize the address column**

In [18]:
hub_miss_geo.loc[:,('incident_address')] = hub_miss_geo['incident_address'].astype(str)

In [19]:
hub_miss_geo.loc[:,('incident_address')] = hub_miss_geo['incident_address'].replace(to_replace= r",.+", value='', regex=True).str.strip()

In [20]:
hub_miss_geo

Unnamed: 0,request_number,date_opened,request,description,incident_address,zip_code,trash_hauler,trash_route,council_district,latitude,longitude,geometry,request_description
1,25274,2017-11-01 07:42:00,Trash - Curbside/Alley Missed Pickup,Curb/Trash miss Tuesday.,4028 Clarksville Pike,37218.0,RED RIVER,4202,1.0,36.213480,-86.838110,POINT (-86.83811 36.21348),Trash - Curbside/Alley Missed PickupCurb/Trash...
2,25276,2017-11-01 07:53:00,Trash - Curbside/Alley Missed Pickup,Curb/trash miss Tuesday.,6528 Thunderbird Dr,37209.0,RED RIVER,4205,20.0,36.142930,-86.885570,POINT (-86.88557 36.14293),Trash - Curbside/Alley Missed PickupCurb/trash...
3,25307,2017-11-01 08:28:00,Trash - Curbside/Alley Missed Pickup,missed,2603 old matthews rd,37207.0,WASTE IND,2206,2.0,36.212660,-86.789180,POINT (-86.78918 36.21266),Trash - Curbside/Alley Missed Pickupmissed
4,25312,2017-11-01 08:55:00,Trash - Curbside/Alley Missed Pickup,Missed the even side of the road.,604 croley dr,37209.0,RED RIVER,4203,20.0,36.154870,-86.875000,POINT (-86.875 36.15487),Trash - Curbside/Alley Missed PickupMissed the...
7,25327,2017-11-01 09:32:00,Trash Collection Complaint,"Trash out on time, miss again Tuesday. ALLEY",1816 Jo Johnston Ave,37203.0,METRO,9208,21.0,36.160340,-86.802990,POINT (-86.80299000000001 36.16034000000001),"Trash Collection ComplaintTrash out on time, m..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20221,267125,2019-11-01 16:03:00,Trash - Curbside/Alley Missed Pickup,MISSED...NEIGHBORS MISSED,2731 Murfreesboro Pike,37013.0,RED RIVER,4502,32.0,36.069130,-86.633970,POINT (-86.63396999999999 36.06913),Trash - Curbside/Alley Missed PickupMISSED...N...
20222,267126,2019-11-01 16:08:00,Trash - Curbside/Alley Missed Pickup,entire alley,1621 Long Ave,37206.0,METRO,9508,6.0,36.169455,-86.741186,POINT (-86.74118559999999 36.1694553),Trash - Curbside/Alley Missed Pickupentire alley
20223,267130,2019-11-01 16:13:00,Trash - Curbside/Alley Missed Pickup,missed several,2943 Windemere Cir,37214.0,RED RIVER,1502,15.0,36.185644,-86.671752,POINT (-86.67175220000001 36.18564399999999),Trash - Curbside/Alley Missed Pickupmissed sev...
20224,267134,2019-11-01 16:15:00,Trash - Curbside/Alley Missed Pickup,Caller stated trash was missed & were only pic...,3325 Murfreesboro Pike,37013.0,RED RIVER,4502,32.0,36.054583,-86.619823,POINT (-86.619823 36.054583),Trash - Curbside/Alley Missed PickupCaller sta...
