In [165]:
import pandas as pd
import numpy as np
import re

In [166]:
trash_hauler = pd.read_csv('trash_hauler_report.csv')
trash_hauler.head()

Unnamed: 0,Request Number,Date Opened,Request,Description,Incident Address,Zip Code,Trash Hauler,Trash Route,Council District,State Plan X,State Plan Y
0,25270,11/01/17,Trash - Backdoor,"house with the wheel chair ramp, they share dr...",3817 Crouch Dr,37207.0,RED RIVER,3205,2.0,1727970.0,686779.478089
1,25274,11/01/17,Trash - Curbside/Alley Missed Pickup,Curb/Trash miss Tuesday.,4028 Clarksville Pike,37218.0,RED RIVER,4202,1.0,1721259.0,685444.799565
2,25276,11/01/17,Trash - Curbside/Alley Missed Pickup,Curb/trash miss Tuesday.,6528 Thunderbird Dr,37209.0,RED RIVER,4205,20.0,1707027.0,659887.471571
3,25307,11/01/17,Trash - Curbside/Alley Missed Pickup,missed,2603 old matthews rd,37207.0,WASTE IND,2206,2.0,1735692.0,685027.245923
4,25312,11/01/17,Trash - Curbside/Alley Missed Pickup,Missed the even side of the road.,604 croley dr,37209.0,RED RIVER,4203,20.0,1710186.0,664205.101066


In [167]:
trash_hauler.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20226 entries, 0 to 20225
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Request Number    20226 non-null  int64  
 1   Date Opened       20226 non-null  object 
 2   Request           20226 non-null  object 
 3   Description       20195 non-null  object 
 4   Incident Address  20217 non-null  object 
 5   Zip Code          20151 non-null  float64
 6   Trash Hauler      19325 non-null  object 
 7   Trash Route       19279 non-null  object 
 8   Council District  20177 non-null  float64
 9   State Plan X      20198 non-null  float64
 10  State Plan Y      20198 non-null  float64
dtypes: float64(4), int64(1), object(6)
memory usage: 1.7+ MB


In [168]:
trash_hauler.shape

(20226, 11)

In [169]:
nulls = trash_hauler.isna().sum()
nulls

Request Number        0
Date Opened           0
Request               0
Description          31
Incident Address      9
Zip Code             75
Trash Hauler        901
Trash Route         947
Council District     49
State Plan X         28
State Plan Y         28
dtype: int64

#### Clean Column Headers

In [170]:
trash_hauler.columns
# notice whitespace after 'Request '

Index(['Request Number', 'Date Opened', 'Request ', 'Description',
       'Incident Address', 'Zip Code', 'Trash Hauler', 'Trash Route',
       'Council District', 'State Plan X', 'State Plan Y'],
      dtype='object')

In [171]:
trash_hauler.columns = trash_hauler.columns.str.strip()
# removes whitespace from column headers

#### Change Zip Code and District to an integer. Date Opened to datetime.

In [172]:
trash_hauler['Zip Code'] = trash_hauler['Zip Code'].astype('Int64').astype('string').str.zfill(5)
trash_hauler.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20226 entries, 0 to 20225
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Request Number    20226 non-null  int64  
 1   Date Opened       20226 non-null  object 
 2   Request           20226 non-null  object 
 3   Description       20195 non-null  object 
 4   Incident Address  20217 non-null  object 
 5   Zip Code          20151 non-null  string 
 6   Trash Hauler      19325 non-null  object 
 7   Trash Route       19279 non-null  object 
 8   Council District  20177 non-null  float64
 9   State Plan X      20198 non-null  float64
 10  State Plan Y      20198 non-null  float64
dtypes: float64(3), int64(1), object(6), string(1)
memory usage: 1.7+ MB


In [173]:
trash_hauler['Council District'] = trash_hauler['Council District'].astype('Int64')
trash_hauler.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20226 entries, 0 to 20225
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Request Number    20226 non-null  int64  
 1   Date Opened       20226 non-null  object 
 2   Request           20226 non-null  object 
 3   Description       20195 non-null  object 
 4   Incident Address  20217 non-null  object 
 5   Zip Code          20151 non-null  string 
 6   Trash Hauler      19325 non-null  object 
 7   Trash Route       19279 non-null  object 
 8   Council District  20177 non-null  Int64  
 9   State Plan X      20198 non-null  float64
 10  State Plan Y      20198 non-null  float64
dtypes: Int64(1), float64(2), int64(1), object(6), string(1)
memory usage: 1.7+ MB


In [174]:
trash_hauler['Date Opened'].head(10)
trash_hauler['Date Opened'].sample(10, random_state=1)

7969     12/14/18
13881    06/14/19
19048    10/09/19
17090    08/17/19
13127    05/29/19
16010    08/02/19
8106     12/20/18
13457    06/06/19
3516     05/15/18
3854     06/01/18
Name: Date Opened, dtype: object

In [175]:
trash_hauler['Date Opened'] = pd.to_datetime(trash_hauler['Date Opened'])
trash_hauler.head()

  trash_hauler['Date Opened'] = pd.to_datetime(trash_hauler['Date Opened'])


Unnamed: 0,Request Number,Date Opened,Request,Description,Incident Address,Zip Code,Trash Hauler,Trash Route,Council District,State Plan X,State Plan Y
0,25270,2017-11-01,Trash - Backdoor,"house with the wheel chair ramp, they share dr...",3817 Crouch Dr,37207,RED RIVER,3205,2,1727970.0,686779.478089
1,25274,2017-11-01,Trash - Curbside/Alley Missed Pickup,Curb/Trash miss Tuesday.,4028 Clarksville Pike,37218,RED RIVER,4202,1,1721259.0,685444.799565
2,25276,2017-11-01,Trash - Curbside/Alley Missed Pickup,Curb/trash miss Tuesday.,6528 Thunderbird Dr,37209,RED RIVER,4205,20,1707027.0,659887.471571
3,25307,2017-11-01,Trash - Curbside/Alley Missed Pickup,missed,2603 old matthews rd,37207,WASTE IND,2206,2,1735692.0,685027.245923
4,25312,2017-11-01,Trash - Curbside/Alley Missed Pickup,Missed the even side of the road.,604 croley dr,37209,RED RIVER,4203,20,1710186.0,664205.101066


In [176]:
trash_hauler['Date Opened'].head(10)
trash_hauler['Date Opened'].sample(10, random_state=1)

7969    2018-12-14
13881   2019-06-14
19048   2019-10-09
17090   2019-08-17
13127   2019-05-29
16010   2019-08-02
8106    2018-12-20
13457   2019-06-06
3516    2018-05-15
3854    2018-06-01
Name: Date Opened, dtype: datetime64[ns]

#### Sanity check to ensure year, month, day is being called correctly

In [177]:
trash_hauler['Date Opened'].dt.year

0        2017
1        2017
2        2017
3        2017
4        2017
         ... 
20221    2019
20222    2019
20223    2019
20224    2019
20225    2019
Name: Date Opened, Length: 20226, dtype: int32

In [178]:
pattern = r"\bmiss\w*\b|not\s+emptied|not\s+picked\s+up"

missed_red_river = trash_hauler[
    (trash_hauler['Trash Hauler'].str.contains("Red River", case=False, na=False)) &
    (
        (trash_hauler['Request'].str.contains("Missed Pickup", case=False, na=False)) |
        (trash_hauler['Description'].str.contains(pattern, case=False, na=False))
    )
]

In [182]:
missed_red_river = missed_red_river.drop_duplicates(subset=['Incident Address', 'Date Opened'])

In [183]:
missed_red_river

Unnamed: 0,Request Number,Date Opened,Request,Description,Incident Address,Zip Code,Trash Hauler,Trash Route,Council District,State Plan X,State Plan Y,opened_day
1,25274,2017-11-01,Trash - Curbside/Alley Missed Pickup,Curb/Trash miss Tuesday.,4028 Clarksville Pike,37218,RED RIVER,4202,1,1.721259e+06,685444.799565,2017-11-01
2,25276,2017-11-01,Trash - Curbside/Alley Missed Pickup,Curb/trash miss Tuesday.,6528 Thunderbird Dr,37209,RED RIVER,4205,20,1.707027e+06,659887.471571,2017-11-01
4,25312,2017-11-01,Trash - Curbside/Alley Missed Pickup,Missed the even side of the road.,604 croley dr,37209,RED RIVER,4203,20,1.710186e+06,664205.101066,2017-11-01
8,25330,2017-11-01,Trash - Curbside/Alley Missed Pickup,Missed.,4484 Lavergne Couchville Pike,37013,RED RIVER,4210,33,1.794534e+06,618749.342732,2017-11-01
10,25341,2017-11-01,Trash - Curbside/Alley Missed Pickup,Missed trash pickup - resident has at curb for...,3113 HYDES FERRY RD,37218,RED RIVER,4204,2,1.721578e+06,676018.399900,2017-11-01
...,...,...,...,...,...,...,...,...,...,...,...,...
20217,267106,2019-11-01,Trash - Curbside/Alley Missed Pickup,Caller stated trash was missed & all her neigh...,"3318 Anderson Rd, Nashville, TN 37013, United ...",37013,RED RIVER,3501,29,1.789259e+06,637053.276336,2019-11-01
20220,267121,2019-11-01,Trash - Curbside/Alley Missed Pickup,missed,"2709 Crestdale Dr, Nashville, TN 37214, United...",37214,RED RIVER,1502,15,1.770240e+06,676334.399319,2019-11-01
20221,267125,2019-11-01,Trash - Curbside/Alley Missed Pickup,MISSED...NEIGHBORS MISSED,2731 Murfreesboro Pike,37013,RED RIVER,4502,32,1.781137e+06,632448.551144,2019-11-01
20223,267130,2019-11-01,Trash - Curbside/Alley Missed Pickup,missed several,"2943 Windemere Cir, Nashville, TN 37214, Unite...",37214,RED RIVER,1502,15,1.770293e+06,674936.303809,2019-11-01


In [185]:
missed_red_river['Incident Address'].value_counts().head(20)

Incident Address
12546 Old Hickory Blvd, Nashville, TN 37013, United States    19
5135 Hickory Hollow Pkwy                                      18
3710 N NATCHEZ CT                                             17
802 Crescent Rd, Nashville, TN 37205, United States           16
607 Estes Rd, Nashville, TN 37215, United States              15
6007 Obrien Ave, Nashville, TN 37209, United States           14
116 Margaret St, Nashville, TN 37115, United States           12
617 kinsey blvd                                               12
14881 Old Hickory Blvd, Nashville, TN 37013, United States    11
111 Barton Ln, Nashville, TN 37214, United States             11
1537 Harding Pl, Nashville, TN 37215, United States           11
209 Page Rd, Nashville, TN 37205, United States               11
320 Old Hickory Blvd, Nashville, TN 37221, United States      11
2731 Murfreesboro Pike                                        10
3929 Stewarts Ln                                              10
115 Bart