In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df_trash = pd.read_csv('../data/trash_hauler_report_with_lat_lng.csv')

In [3]:
df_trash.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20226 entries, 0 to 20225
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Request Number    20226 non-null  int64  
 1   Date Opened       20226 non-null  object 
 2   Request           20226 non-null  object 
 3   Description       20195 non-null  object 
 4   Incident Address  20217 non-null  object 
 5   Zip Code          20151 non-null  float64
 6   Trash Hauler      19325 non-null  object 
 7   Trash Route       19279 non-null  object 
 8   Council District  20177 non-null  float64
 9   State Plan X      20198 non-null  float64
 10  State Plan Y      20198 non-null  float64
 11  LONGITUDE         20198 non-null  float64
 12  LATITUDE          20198 non-null  float64
dtypes: float64(6), int64(1), object(6)
memory usage: 2.0+ MB


In [4]:
#In this data question you will be working data of service request related to missed trash pickups from hubNashville, Metro Nashville government's comprehensive customer service system 
#(https://hub.nashville.gov).

#As part of Metro's contract with Red River Waste Solutions, failure to remedy an action or inaction will result in liquidated damages. 
#One category of liquidated damages is related to chronic problems in any category of service at the same premises. 
#A chronic problem is defined as more than one missed pickup for any address. The first missed pickup will not result in a fine; however, 
#every subsequent missed pickup will result in a $200 fine.

#Your job is to determine the total amount of damages due to missed pickups. Note that not all rows that you have been provided correspond 
#to missed pickups and that you will need to ensure that you are only counting missed pickups.

#After determining the total amount of damages, you can look at other questions:

#⦁	What other types of complaints are there?
#⦁	Are there any geospatial analysis you can do?  Which visualizations can you create?
#⦁	How do metro crews compare to the contractor's performance?
#⦁	How much does each trash hauler owe?
#⦁	What were to total missed pickup by route?

In [5]:
#Find the total amount of damages for more than one missed pickup for any address. Fine is $200 for more than one missed pickup.

In [6]:
df_trash.describe()

Unnamed: 0,Request Number,Zip Code,Council District,State Plan X,State Plan Y,LONGITUDE,LATITUDE
count,20226.0,20151.0,20177.0,20198.0,20198.0,20198.0,20198.0
mean,149178.986354,37180.621805,18.0791,1763012.0,659506.9,-86.709972,36.12073
std,71837.306889,66.310919,9.917632,775495.1,43554.05,2.02563,0.845846
min,25270.0,37013.0,1.0,1663490.0,-271910.6,-87.031386,0.000322
25%,86052.75,37205.0,8.0,1727831.0,640654.2,-86.81529,36.090817
50%,145223.5,37209.0,19.0,1745845.0,656485.8,-86.754014,36.133888
75%,217162.75,37214.0,26.0,1759491.0,675673.6,-86.707973,36.186992
max,267137.0,37228.0,35.0,34968920.0,2204382.0,-0.000798,36.466974


In [7]:
# Check for missing values in the DataFrame
missing_values = df_trash.isnull().sum()
print("Missing Values:")
print(missing_values)

#these data strings were inspired by this website https://www.kdnuggets.com/7-essential-data-quality-checks-with-pandas

Missing Values:
Request Number        0
Date Opened           0
Request               0
Description          31
Incident Address      9
Zip Code             75
Trash Hauler        901
Trash Route         947
Council District     49
State Plan X         28
State Plan Y         28
LONGITUDE            28
LATITUDE             28
dtype: int64


In [8]:
missed_pickups = df_trash[df_trash['Request'].isin(['Trash - Curbside/Alley Missed Pickup', 'Trash - Backdoor', 'Trash Collection Complaint'])]
print(f"Number of missed pickups: {len(missed_pickups)}")

Number of missed pickups: 19969


In [9]:
fine_amount = 200

misses_allowed = 1

In [10]:
#All missed per address
address_count = missed_pickups['Incident Address'].value_counts()

In [11]:
address_count

Incident Address
5135 Hickory Hollow Pkwy                                       21
3710 N NATCHEZ CT                                              20
6007 Obrien Ave, Nashville, TN 37209, United States            19
12546 Old Hickory Blvd, Nashville, TN 37013, United States     19
802 Crescent Rd, Nashville, TN 37205, United States            18
                                                               ..
1326 Coreland Dr, Nashville, TN 37115, United States            1
353 Huntington Ridge Dr, Nashville, TN 37211, United States     1
2926 Primrose Cir, Nashville, TN 37212, United States           1
1502 55th Ave n                                                 1
1812 Cedar Ln, Nashville, TN 37212, United States               1
Name: count, Length: 13936, dtype: int64

In [12]:
#missed pickups for just Red River
red_river = missed_pickups[missed_pickups['Trash Hauler'] == 'RED RIVER']

In [13]:
red_river_counts = red_river['Incident Address'].value_counts()

In [14]:
# fines
all_fines = ((address_count[address_count > 1] == misses_allowed) * fine_amount).sum()

In [15]:
print(all_fines)

0


In [16]:
#fines just for Red River
red_river_fines = ((red_river_counts[red_river_counts >1] == misses_allowed) * fine_amount).sum()

In [17]:
red_river_fines

np.int64(0)