In [30]:
import pandas as pd
import numpy as np
import re

In [31]:
trash = pd.read_csv('trash_hauler_report.csv')
trash.head()

Unnamed: 0,Request Number,Date Opened,Request,Description,Incident Address,Zip Code,Trash Hauler,Trash Route,Council District,State Plan X,State Plan Y
0,25270,11/01/17,Trash - Backdoor,"house with the wheel chair ramp, they share dr...",3817 Crouch Dr,37207.0,RED RIVER,3205,2.0,1727970.0,686779.478089
1,25274,11/01/17,Trash - Curbside/Alley Missed Pickup,Curb/Trash miss Tuesday.,4028 Clarksville Pike,37218.0,RED RIVER,4202,1.0,1721259.0,685444.799565
2,25276,11/01/17,Trash - Curbside/Alley Missed Pickup,Curb/trash miss Tuesday.,6528 Thunderbird Dr,37209.0,RED RIVER,4205,20.0,1707027.0,659887.471571
3,25307,11/01/17,Trash - Curbside/Alley Missed Pickup,missed,2603 old matthews rd,37207.0,WASTE IND,2206,2.0,1735692.0,685027.245923
4,25312,11/01/17,Trash - Curbside/Alley Missed Pickup,Missed the even side of the road.,604 croley dr,37209.0,RED RIVER,4203,20.0,1710186.0,664205.101066


In [32]:
trash.columns = trash.columns.str.strip()
trash['Zip Code'] = trash['Zip Code'].astype('Int64').astype(str).str.zfill(5)
trash = trash[trash['Request'] != 'Damage to Property']

missed_categories = ['Trash - Backdoor', 'Trash - Curbside/Alley Missed Pickup']

pattern = r"\bmiss\w*\b|not\s+emptied|not\s+picked\s+up|no\s+pick\s+up" #miss/missed, not emptied, not picked up, no pick up

if isinstance(missed_categories, list):
    missed_pattern = '|'.join(missed_categories)
else:
    missed_pattern = missed_categories 

mmissed = trash[
    (trash['Trash Hauler'].str.contains("Metro", case=False, na=False)) &
    (
        (trash['Request'].str.contains(missed_pattern, case=False, na=False)) |
        (trash['Description'].str.contains(pattern, case=False, na=False))
    )
]

mmissed = mmissed.dropna(subset = ['Incident Address']) # drop known missing address

# rrmissed = rrmissed.drop_duplicates(subset=['Incident Address', 'Date Opened']) # I believe this is redundancy and is taken care of later

def normalize_address(s: str) -> str:
    if pd.isna(s):
        return s
    # Only keep everything before the first comma
    s = s.split(",")[0]
    s = s.upper()
    s = re.sub(r"[.,]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    # (keep the direction/street type replacements here)
    return s

mmissed["address_norm"] = mmissed["Incident Address"].apply(normalize_address)

mmissed["Date Opened"] = pd.to_datetime(mmissed["Date Opened"], errors="coerce")
mmissed["date_only"] = mmissed["Date Opened"].dt.date

mmissed_dedup = (
    mmissed.sort_values(['address_norm', 'date_only', 'Date Opened'])
    .drop_duplicates(subset=['address_norm', 'date_only'], keep='first')
    .reset_index(drop=True)
)

gap = mmissed_dedup.groupby("address_norm")["Date Opened"].diff().dt.days
mmissed_dedup["new_event"] = (gap.isna()) | (gap > 6)
mmissed_dedup["event_id"]  = mmissed_dedup.groupby("address_norm")["new_event"].cumsum()

grp = mmissed_dedup.groupby(['address_norm', "event_id"])['Date Opened']
mmissed_dedup["event_date"]     = grp.transform("min")     

mmissed_dedup["event_seq"] = mmissed_dedup["event_id"].astype("Int64")

mmissed_dedup["fine_event"] = (mmissed_dedup["event_seq"] > 1).astype(int) * 200

mmissed_dedup["first_event"] = (mmissed_dedup['Date Opened'] == mmissed_dedup["event_date"])
mmissed_dedup["fine_event"] = mmissed_dedup["fine_event"].where(mmissed_dedup["first_event"], 0)

  mmissed["Date Opened"] = pd.to_datetime(mmissed["Date Opened"], errors="coerce")


In [5]:
mmissed_dedup

Unnamed: 0,Request Number,Date Opened,Request,Description,Incident Address,Zip Code,Trash Hauler,Trash Route,Council District,State Plan X,State Plan Y,address_norm,date_only,new_event,event_id,event_date,event_seq,fine_event,first_event
0,79395,2018-06-29,Trash - Curbside/Alley Missed Pickup,the entire street was missed,1 BELLE FORREST AVE C,37206,METRO,9502,7.0,1.751942e+06,677895.310490,1 BELLE FORREST AVE C,2018-06-29,True,1,2018-06-29,1,0,True
1,79884,2018-07-02,Trash - Curbside/Alley Missed Pickup,Missed entire street- carts are curbside in fr...,10 Belle Forrest Ave,37206,METRO,9502,7.0,1.751718e+06,678077.934103,10 BELLE FORREST AVE,2018-07-02,True,1,2018-07-02,1,0,True
2,257588,2019-10-10,Trash - Curbside/Alley Missed Pickup,miss. driveway is on 14th,"100 Marshall Ct, Nashville, TN 37212, United S...",37212,METRO,9407,19.0,1.735543e+06,660440.744955,100 MARSHALL CT,2019-10-10,True,1,2019-10-10,1,0,True
3,266788,2019-11-01,Trash - Curbside/Alley Missed Pickup,missed 102 marshall court as well,"100 Marshall Ct, Nashville, TN 37212, United S...",37212,METRO,9407,19.0,1.735543e+06,660440.744955,100 MARSHALL CT,2019-11-01,True,2,2019-11-01,2,200,True
4,253133,2019-09-30,Trash - Curbside/Alley Missed Pickup,MISSED- ALLEY,"1000 Gilmore Ave, Nashville, TN 37204, United ...",37204,METRO,9403,17.0,1.736212e+06,652995.000671,1000 GILMORE AVE,2019-09-30,True,1,2019-09-30,1,0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3041,149959,2019-03-04,Trash - Curbside/Alley Missed Pickup,entire street was missed,"961 16th Ave N, Nashville, TN 37208, United St...",37208,METRO,9208,19.0,1.731877e+06,668380.001408,961 16TH AVE N,2019-03-04,True,1,2019-03-04,1,0,True
3042,224867,2019-08-02,Trash - Curbside/Alley Missed Pickup,ENTIRE STREET AND SURROUNDING STREETS SUCH AS ...,"965 9th Ave S, Nashville, TN 37203, United States",37203,METRO,9407,17.0,1.737999e+06,660504.848803,965 9TH AVE S,2019-08-02,True,1,2019-08-02,1,0,True
3043,226241,2019-08-06,Trash - Curbside/Alley Missed Pickup,I called 311 on 2 AUgust and filed a missed ga...,"965 9th Ave S, Nashville, TN 37203, United States",37203,METRO,9407,17.0,1.737999e+06,660504.848803,965 9TH AVE S,2019-08-06,False,1,2019-08-02,1,0,False
3044,128671,2019-01-07,Trash - Curbside/Alley Missed Pickup,Missed...along with neighbors.,"994 Dozier Pl, Nashville, TN 37216, United States",37216,METRO,9507,5.0,1.748996e+06,681411.999519,994 DOZIER PL,2019-01-07,True,1,2019-01-07,1,0,True


In [33]:
mmissed_final = mmissed_dedup[mmissed_dedup['new_event'] == True]
mmissed_final

Unnamed: 0,Request Number,Date Opened,Request,Description,Incident Address,Zip Code,Trash Hauler,Trash Route,Council District,State Plan X,State Plan Y,address_norm,date_only,new_event,event_id,event_date,event_seq,fine_event,first_event
0,79395,2018-06-29,Trash - Curbside/Alley Missed Pickup,the entire street was missed,1 BELLE FORREST AVE C,37206,METRO,9502,7.0,1.751942e+06,677895.310490,1 BELLE FORREST AVE C,2018-06-29,True,1,2018-06-29,1,0,True
1,79884,2018-07-02,Trash - Curbside/Alley Missed Pickup,Missed entire street- carts are curbside in fr...,10 Belle Forrest Ave,37206,METRO,9502,7.0,1.751718e+06,678077.934103,10 BELLE FORREST AVE,2018-07-02,True,1,2018-07-02,1,0,True
2,257588,2019-10-10,Trash - Curbside/Alley Missed Pickup,miss. driveway is on 14th,"100 Marshall Ct, Nashville, TN 37212, United S...",37212,METRO,9407,19.0,1.735543e+06,660440.744955,100 MARSHALL CT,2019-10-10,True,1,2019-10-10,1,0,True
3,266788,2019-11-01,Trash - Curbside/Alley Missed Pickup,missed 102 marshall court as well,"100 Marshall Ct, Nashville, TN 37212, United S...",37212,METRO,9407,19.0,1.735543e+06,660440.744955,100 MARSHALL CT,2019-11-01,True,2,2019-11-01,2,200,True
4,253133,2019-09-30,Trash - Curbside/Alley Missed Pickup,MISSED- ALLEY,"1000 Gilmore Ave, Nashville, TN 37204, United ...",37204,METRO,9403,17.0,1.736212e+06,652995.000671,1000 GILMORE AVE,2019-09-30,True,1,2019-09-30,1,0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3040,40981,2018-01-22,Trash - Curbside/Alley Missed Pickup,missed pick up,950 strouse ave a,37206,METRO,9503,5.0,1.747772e+06,678308.902742,950 STROUSE AVE A,2018-01-22,True,1,2018-01-22,1,0,True
3041,149959,2019-03-04,Trash - Curbside/Alley Missed Pickup,entire street was missed,"961 16th Ave N, Nashville, TN 37208, United St...",37208,METRO,9208,19.0,1.731877e+06,668380.001408,961 16TH AVE N,2019-03-04,True,1,2019-03-04,1,0,True
3042,224867,2019-08-02,Trash - Curbside/Alley Missed Pickup,ENTIRE STREET AND SURROUNDING STREETS SUCH AS ...,"965 9th Ave S, Nashville, TN 37203, United States",37203,METRO,9407,17.0,1.737999e+06,660504.848803,965 9TH AVE S,2019-08-02,True,1,2019-08-02,1,0,True
3044,128671,2019-01-07,Trash - Curbside/Alley Missed Pickup,Missed...along with neighbors.,"994 Dozier Pl, Nashville, TN 37216, United States",37216,METRO,9507,5.0,1.748996e+06,681411.999519,994 DOZIER PL,2019-01-07,True,1,2019-01-07,1,0,True


In [34]:
mmissed_final.to_csv('mmissed_final.csv', index = False)

In [40]:
total_fines = int(mmissed_filtered["fine_event"].sum())

events_fined = int(
    mmissed_final[mmissed_final['event_id'] != 1].drop_duplicates(['address_norm', "event_id"]).shape[0]
)

In [41]:
total_fines

197400

In [42]:
events_fined

987

In [None]:
mmissed_final.to_csv('mmissed_final.csv', index = False)

In [47]:
mmissed_final_fined = mmissed_final[mmissed_final['fine_event'] != 0]
mmissed_final_fined

Unnamed: 0,Request Number,Date Opened,Request,Description,Incident Address,Zip Code,Trash Hauler,Trash Route,Council District,State Plan X,State Plan Y,address_norm,date_only,new_event,event_id,event_date,event_seq,fine_event,first_event
3,266788,2019-11-01,Trash - Curbside/Alley Missed Pickup,missed 102 marshall court as well,"100 Marshall Ct, Nashville, TN 37212, United S...",37212,METRO,9407,19.0,1.735543e+06,660440.744955,100 MARSHALL CT,2019-11-01,True,2,2019-11-01,2,200,True
22,80210,2018-07-03,Trash Collection Complaint,Missed- trash 3 weeks - caller said missed all...,1005 N 6th St,37207,METRO,9203,5.0,1.743884e+06,676228.900673,1005 N 6TH ST,2018-07-03,True,2,2018-07-03,2,200,True
27,209281,2019-07-02,Trash - Curbside/Alley Missed Pickup,Alley/trash missed {Tuesday} seen truck but di...,"1006 Pennock Ave, Nashville, TN 37207, United ...",37207,METRO,9204,5.0,1.742195e+06,676283.198953,1006 PENNOCK AVE,2019-07-02,True,2,2019-07-02,2,200,True
42,158241,2019-03-22,Trash - Curbside/Alley Missed Pickup,i watched them drive by my house when i was ou...,"1009 14TH AVE S, 37212",37212,METRO,9407,19.0,1.735463e+06,660708.662868,1009 14TH AVE S,2019-03-22,True,2,2019-03-22,2,200,True
44,252698,2019-09-27,Trash - Curbside/Alley Missed Pickup,"missed trash pick up yesterday (thursday, sept...","1009 14th Ave S, Nashville, TN 37212, United S...",37212,METRO,9407,19.0,1.735463e+06,660708.662868,1009 14TH AVE S,2019-09-27,True,3,2019-09-27,3,200,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3026,88097,2018-08-06,Trash - Backdoor,Missed backdoor pickup has missed for 2 weeks....,936 Cahal Ave,37206,METRO,9507,5.0,1.748204e+06,679278.132847,936 CAHAL AVE,2018-08-06,True,5,2018-08-06,5,200,True
3027,158554,2019-03-22,Trash - Backdoor,Missed backdoor Trash/pickup has missed for 2 ...,936 Cahal Ave,37206,METRO,9507,5.0,1.748206e+06,679274.398565,936 CAHAL AVE,2019-03-22,True,6,2019-03-22,6,200,True
3028,165477,2019-04-05,Trash - Backdoor,backdoor miss,"936 Cahal Ave, Nashville, TN 37206, United States",37206,METRO,9507,5.0,1.748206e+06,679274.398565,936 CAHAL AVE,2019-04-05,True,7,2019-04-05,7,200,True
3029,211729,2019-07-08,Trash - Backdoor,backdoor miss. 2 week,"936 Cahal Ave, Nashville, TN 37206, United States",37206,METRO,9507,5.0,1.748206e+06,679274.398565,936 CAHAL AVE,2019-07-08,True,8,2019-07-08,8,200,True


In [48]:
mmissed_final_fined.to_csv('mmissed_final_fined.csv', index = False)