In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re

In [2]:
df_trash = pd.read_csv('../data/trash_hauler_report_with_lat_lng.csv')

In [3]:
# Explore the data.
df_trash.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20226 entries, 0 to 20225
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Request Number    20226 non-null  int64  
 1   Date Opened       20226 non-null  object 
 2   Request           20226 non-null  object 
 3   Description       20195 non-null  object 
 4   Incident Address  20217 non-null  object 
 5   Zip Code          20151 non-null  float64
 6   Trash Hauler      19325 non-null  object 
 7   Trash Route       19279 non-null  object 
 8   Council District  20177 non-null  float64
 9   State Plan X      20198 non-null  float64
 10  State Plan Y      20198 non-null  float64
 11  LONGITUDE         20198 non-null  float64
 12  LATITUDE          20198 non-null  float64
dtypes: float64(6), int64(1), object(6)
memory usage: 2.0+ MB


In [4]:
# See a list of all "request" types.
unique_requests = df_trash["Request"].unique()
print(unique_requests)

['Trash - Backdoor' 'Trash - Curbside/Alley Missed Pickup'
 'Trash Collection Complaint' 'Damage to Property']


In [5]:
request_category_counts = df_trash["Request"].value_counts()
print(request_category_counts)

Request
Trash - Curbside/Alley Missed Pickup    15028
Trash - Backdoor                         2629
Trash Collection Complaint               2312
Damage to Property                        257
Name: count, dtype: int64


In [6]:
# Rename columns for uniformity.
df_trash.columns = [col.lower().replace(" ", "_") for col in df_trash.columns]

In [7]:
df_trash.head()

Unnamed: 0,request_number,date_opened,request,description,incident_address,zip_code,trash_hauler,trash_route,council_district,state_plan_x,state_plan_y,longitude,latitude
0,25270,11/1/2017,Trash - Backdoor,"house with the wheel chair ramp, they share dr...",3817 Crouch Dr,37207.0,RED RIVER,3205,2.0,1727970.412,686779.4781,-86.815392,36.217292
1,25274,11/1/2017,Trash - Curbside/Alley Missed Pickup,Curb/Trash miss Tuesday.,4028 Clarksville Pike,37218.0,RED RIVER,4202,1.0,1721259.366,685444.7996,-86.838103,36.21347
2,25276,11/1/2017,Trash - Curbside/Alley Missed Pickup,Curb/trash miss Tuesday.,6528 Thunderbird Dr,37209.0,RED RIVER,4205,20.0,1707026.753,659887.4716,-86.885562,36.142923
3,25307,11/1/2017,Trash - Curbside/Alley Missed Pickup,missed,2603 old matthews rd,37207.0,WASTE IND,2206,2.0,1735691.771,685027.2459,-86.78917,36.212652
4,25312,11/1/2017,Trash - Curbside/Alley Missed Pickup,Missed the even side of the road.,604 croley dr,37209.0,RED RIVER,4203,20.0,1710185.772,664205.1011,-86.874995,36.154861


In [8]:
df_trash.tail()

Unnamed: 0,request_number,date_opened,request,description,incident_address,zip_code,trash_hauler,trash_route,council_district,state_plan_x,state_plan_y,longitude,latitude
20221,267125,11/1/2019,Trash - Curbside/Alley Missed Pickup,MISSED...NEIGHBORS MISSED,2731 Murfreesboro Pike,37013.0,RED RIVER,4502,32.0,1781137.263,632448.5511,-86.63397,36.06913
20222,267126,11/1/2019,Trash - Curbside/Alley Missed Pickup,entire alley,"1621 Long Ave, Nashville, TN 37206, United States",37206.0,METRO,9508,6.0,1749711.399,669201.6016,-86.741242,36.169482
20223,267130,11/1/2019,Trash - Curbside/Alley Missed Pickup,missed several,"2943 Windemere Cir, Nashville, TN 37214, Unite...",37214.0,RED RIVER,1502,15.0,1770293.388,674936.3038,-86.671647,36.185643
20224,267134,11/1/2019,Trash - Curbside/Alley Missed Pickup,Caller stated trash was missed & were only pic...,"3325 Murfreesboro Pike, Nashville, TN 37013, U...",37013.0,RED RIVER,4502,32.0,1785224.998,627146.4002,-86.620025,36.054637
20225,267137,11/1/2019,Trash - Curbside/Alley Missed Pickup,possibly others missed as well,"604 Somerset Ct, Nashville, TN 37217, United S...",37217.0,RED RIVER,2505,29.0,1781360.323,637742.0068,-86.633331,36.083675


Missed Trash Pickups

In this data question you will be working data of service request related to missed trash pickups from hubNashville, 
Metro Nashville government's comprehensive customer service system (https://hub.nashville.gov).

As part of Metro's contract with Red River Waste Solutions, failure to remedy an action or inaction will result in liquidated damages. 
One category of liquidated damages is related to chronic problems in any category of service at the same premises. 
A chronic problem is defined as more than one missed pickup for any address. 
The first missed pickup will not result in a fine; however, every subsequent missed pickup will result in a $200 fine.

Your job is to determine the total amount of damages due to missed pickups. Note that not all rows that you have been provided 
correspond to missed pickups and that you will need to ensure that you are only counting missed pickups.

After determining the total amount of damages, you can look at other questions:
⦁	What other types of complaints are there?
⦁	Are there any geospatial analysis you can do?  Which visualizations can you create?
⦁	How do metro crews compare to the contractor's performance?
⦁	How much does each trash hauler owe?
⦁	What were to total missed pickup by route?

In [9]:
missing_values = df_trash.isnull().sum()
print("Missing Values:")
print(missing_values)

Missing Values:
request_number        0
date_opened           0
request               0
description          31
incident_address      9
zip_code             75
trash_hauler        901
trash_route         947
council_district     49
state_plan_x         28
state_plan_y         28
longitude            28
latitude             28
dtype: int64


In [10]:
missed_pickups = df_trash[df_trash['request'].isin(['Trash - Curbside/Alley Missed Pickup', 'Trash - Backdoor', 'Trash Collection Complaint'])]
print(f"Number of missed pickups: {len(missed_pickups)}")

Number of missed pickups: 19969


In [11]:
fine_amount = 200
misses_allowed = 1

In [12]:
# All missed per address

address_count = missed_pickups['incident_address'].value_counts()

In [13]:
address_count

incident_address
5135 Hickory Hollow Pkwy                                      21
3710 N NATCHEZ CT                                             20
12546 Old Hickory Blvd, Nashville, TN 37013, United States    19
6007 Obrien Ave, Nashville, TN 37209, United States           19
802 Crescent Rd, Nashville, TN 37205, United States           18
                                                              ..
1816 Glade St, Nashville, TN 37207, United States              1
720 Blackstone Ave, Nashville, TN 37115, United States         1
605 Lischey Ave, Nashville, TN 37207, United States            1
100 Desoto Dr, Nashville, TN 37210, United States              1
2410a Inga St, Nashville, TN 37206, United States              1
Name: count, Length: 13936, dtype: int64

In [14]:
# Missed pickups for Red River

red_river = missed_pickups[missed_pickups['trash_hauler'] == 'RED RIVER']
red_river

Unnamed: 0,request_number,date_opened,request,description,incident_address,zip_code,trash_hauler,trash_route,council_district,state_plan_x,state_plan_y,longitude,latitude
0,25270,11/1/2017,Trash - Backdoor,"house with the wheel chair ramp, they share dr...",3817 Crouch Dr,37207.0,RED RIVER,3205,2.0,1727970.412,686779.4781,-86.815392,36.217292
1,25274,11/1/2017,Trash - Curbside/Alley Missed Pickup,Curb/Trash miss Tuesday.,4028 Clarksville Pike,37218.0,RED RIVER,4202,1.0,1721259.366,685444.7996,-86.838103,36.213470
2,25276,11/1/2017,Trash - Curbside/Alley Missed Pickup,Curb/trash miss Tuesday.,6528 Thunderbird Dr,37209.0,RED RIVER,4205,20.0,1707026.753,659887.4716,-86.885562,36.142923
4,25312,11/1/2017,Trash - Curbside/Alley Missed Pickup,Missed the even side of the road.,604 croley dr,37209.0,RED RIVER,4203,20.0,1710185.772,664205.1011,-86.874995,36.154861
5,25317,11/1/2017,Trash Collection Complaint,left trash cart in middle of driveway instead ...,3602 floral dr,37211.0,RED RIVER,4304,16.0,1751660.164,643215.2011,-86.733980,36.098140
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20220,267121,11/1/2019,Trash - Curbside/Alley Missed Pickup,missed,"2709 Crestdale Dr, Nashville, TN 37214, United...",37214.0,RED RIVER,1502,15.0,1770240.199,676334.3993,-86.671860,36.189483
20221,267125,11/1/2019,Trash - Curbside/Alley Missed Pickup,MISSED...NEIGHBORS MISSED,2731 Murfreesboro Pike,37013.0,RED RIVER,4502,32.0,1781137.263,632448.5511,-86.633970,36.069130
20223,267130,11/1/2019,Trash - Curbside/Alley Missed Pickup,missed several,"2943 Windemere Cir, Nashville, TN 37214, Unite...",37214.0,RED RIVER,1502,15.0,1770293.388,674936.3038,-86.671647,36.185643
20224,267134,11/1/2019,Trash - Curbside/Alley Missed Pickup,Caller stated trash was missed & were only pic...,"3325 Murfreesboro Pike, Nashville, TN 37013, U...",37013.0,RED RIVER,4502,32.0,1785224.998,627146.4002,-86.620025,36.054637


In [15]:
red_river_counts = red_river['incident_address'].value_counts()
red_river_counts

incident_address
12546 Old Hickory Blvd, Nashville, TN 37013, United States    19
3710 N NATCHEZ CT                                             19
6007 Obrien Ave, Nashville, TN 37209, United States           18
5135 Hickory Hollow Pkwy                                      18
802 Crescent Rd, Nashville, TN 37205, United States           17
                                                              ..
604 croley dr                                                  1
3602 floral dr                                                 1
4484 Lavergne Couchville Pike                                  1
3113 HYDES FERRY RD                                            1
6543 Thunderbird Dr                                            1
Name: count, Length: 9975, dtype: int64

In [16]:
# Caluculate fines

all_fines = ((address_count[address_count > 1] - misses_allowed) * fine_amount).sum()
all_fines

np.int64(1204800)

In [17]:
# Red River fines

red_river_fines = ((red_river_counts[red_river_counts >1] - misses_allowed) * fine_amount).sum()
red_river_fines

np.int64(844400)

After determining the total amount of damages, you can look at other questions: 
⦁ What other types of complaints are there? 
⦁ Are there any geospatial analysis you can do? Which visualizations can you create? 
⦁ How do metro crews compare to the contractor's performance? 
⦁ How much does each trash hauler owe? 
⦁ What were to total missed pickup by route?

In [20]:
# What other types of complaints are there?
df_trash['request'].unique()

array(['Trash - Backdoor', 'Trash - Curbside/Alley Missed Pickup',
       'Trash Collection Complaint', 'Damage to Property'], dtype=object)

In [21]:
# How much does each trash hauler owe?
df_trash['trash_hauler'].unique()

array(['RED RIVER', 'WASTE IND', 'METRO', nan, 'Metro'], dtype=object)

In [26]:
waste_ind = missed_pickups[missed_pickups['trash_hauler'] == 'WASTE IND']

In [24]:
waste_ind_counts = waste_ind['incident_address'].value_counts()
waste_ind_counts

incident_address
613 Troy Ct, Nashville, TN 37207, United States          9
3406 Batavia St, Nashville, TN 37209, United States      9
518 monroe st                                            8
15 Hermitage Ave                                         7
80 Lyle Ln, Nashville, TN 37210, United States           7
                                                        ..
907 11th Ave N, Nashville, TN 37208, United States       1
4001 Nebraska Ave, Nashville, TN 37209, United States    1
301 33rd Ave N, Nashville, TN 37209, United States       1
403 Merritt Ave, Nashville, TN 37203, USA                1
2208 Sadler Ave, Nashville, Tennessee, 37210             1
Name: count, Length: 987, dtype: int64

In [25]:
waste_ind_fines = ((waste_ind_counts[waste_ind_counts >1] - misses_allowed) * fine_amount).sum()
waste_ind_fines

np.int64(69400)

In [33]:
metro = missed_pickups[
    missed_pickups["trash_hauler"].str.upper() == "METRO"
]
metro = metro['incident_address'].value_counts()
# Changed metro_counts to metro since that's the variable that contains the value counts
metro_fines = ((metro[metro > 1] - misses_allowed) * fine_amount).sum()
metro_fines

np.int64(195600)

Metro: $195,600
Waste Ind: $69,400
Red River: $844,400