In [198]:
import pandas as pd
import numpy as np
import re

In [199]:
trash = pd.read_csv('trash_hauler_report.csv')
trash.head()

Unnamed: 0,Request Number,Date Opened,Request,Description,Incident Address,Zip Code,Trash Hauler,Trash Route,Council District,State Plan X,State Plan Y
0,25270,11/01/17,Trash - Backdoor,"house with the wheel chair ramp, they share dr...",3817 Crouch Dr,37207.0,RED RIVER,3205,2.0,1727970.0,686779.478089
1,25274,11/01/17,Trash - Curbside/Alley Missed Pickup,Curb/Trash miss Tuesday.,4028 Clarksville Pike,37218.0,RED RIVER,4202,1.0,1721259.0,685444.799565
2,25276,11/01/17,Trash - Curbside/Alley Missed Pickup,Curb/trash miss Tuesday.,6528 Thunderbird Dr,37209.0,RED RIVER,4205,20.0,1707027.0,659887.471571
3,25307,11/01/17,Trash - Curbside/Alley Missed Pickup,missed,2603 old matthews rd,37207.0,WASTE IND,2206,2.0,1735692.0,685027.245923
4,25312,11/01/17,Trash - Curbside/Alley Missed Pickup,Missed the even side of the road.,604 croley dr,37209.0,RED RIVER,4203,20.0,1710186.0,664205.101066


In [200]:
trash.columns = trash.columns.str.strip()
trash['Zip Code'] = trash['Zip Code'].astype('Int64').astype(str).str.zfill(5)
trash = trash[trash['Request'] != 'Damage to Property']

missed_categories = ['Trash - Backdoor', 'Trash - Curbside/Alley Missed Pickup']

pattern = r"\bmiss\w*\b|not\s+emptied|not\s+picked\s+up|no\s+pick\s+up" #miss/missed, not emptied, not picked up, no pick up

if isinstance(missed_categories, list):
    missed_pattern = '|'.join(missed_categories)
else:
    missed_pattern = missed_categories 

rrmissed = trash[
    (trash['Trash Hauler'].str.contains("Red River", case=False, na=False)) &
    (
        (trash['Request'].str.contains(missed_pattern, case=False, na=False)) |
        (trash['Description'].str.contains(pattern, case=False, na=False))
    )
]

rrmissed = rrmissed.dropna(subset = ['Incident Address']) # drop known missing address

# rrmissed = rrmissed.drop_duplicates(subset=['Incident Address', 'Date Opened']) # I believe this is redundancy and is taken care of later

def normalize_address(s: str) -> str:
    if pd.isna(s):
        return s
    # Only keep everything before the first comma
    s = s.split(",")[0]
    s = s.upper()
    s = re.sub(r"[.,]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    # (keep the direction/street type replacements here)
    return s

rrmissed["address_norm"] = rrmissed["Incident Address"].apply(normalize_address)

rrmissed["Date Opened"] = pd.to_datetime(rrmissed["Date Opened"], errors="coerce")
rrmissed["date_only"] = rrmissed["Date Opened"].dt.date

rrmissed_dedup = (
    rrmissed.sort_values(['address_norm', 'date_only', 'Date Opened'])
    .drop_duplicates(subset=['address_norm', 'date_only'], keep='first')
    .reset_index(drop=True)
)

gap = rrmissed_dedup.groupby("address_norm")["Date Opened"].diff().dt.days
rrmissed_dedup["new_event"] = (gap.isna()) | (gap > 6)
rrmissed_dedup["event_id"]  = rrmissed_dedup.groupby("address_norm")["new_event"].cumsum()

grp = rrmissed_dedup.groupby(['address_norm', "event_id"])['Date Opened']
rrmissed_dedup["event_date"]     = grp.transform("min")     

rrmissed_dedup["event_seq"] = rrmissed_dedup["event_id"].astype("Int64")

rrmissed_dedup["fine_event"] = (rrmissed_dedup["event_seq"] > 1).astype(int) * 200

rrmissed_dedup["first_event"] = (rrmissed_dedup['Date Opened'] == rrmissed_dedup["event_date"])
rrmissed_dedup["fine_event"] = rrmissed_dedup["fine_event"].where(rrmissed_dedup["first_event"], 0)

  rrmissed["Date Opened"] = pd.to_datetime(rrmissed["Date Opened"], errors="coerce")


In [201]:
rrmissed_dedup

Unnamed: 0,Request Number,Date Opened,Request,Description,Incident Address,Zip Code,Trash Hauler,Trash Route,Council District,State Plan X,State Plan Y,address_norm,date_only,new_event,event_id,event_date,event_seq,fine_event,first_event
0,155122,2019-03-15,Trash - Curbside/Alley Missed Pickup,MISS,"100 Bluefield Square, Nashville, TN 37214, Uni...",37214,RED RIVER,1505,15.0,1.770431e+06,666861.601362,100 BLUEFIELD SQUARE,2019-03-15,True,1,2019-03-15,1,0,True
1,52252,2018-03-07,Trash - Curbside/Alley Missed Pickup,Missed- trash,100 Braxton Hill Ct,37204,RED RIVER,3302S,25.0,1.733781e+06,640909.303557,100 BRAXTON HILL CT,2018-03-07,True,1,2018-03-07,1,0,True
2,121431,2018-12-05,Trash - Curbside/Alley Missed Pickup,Missed- trash,100 Brook Hollow Rd,37205,RED RIVER,1303,23.0,1.708043e+06,642454.642918,100 BROOK HOLLOW RD,2018-12-05,True,1,2018-12-05,1,0,True
3,30424,2017-11-28,Trash - Curbside/Alley Missed Pickup,missed pick up,100 cedarmont ct,37211,RED RIVER,4403,31.0,1.760265e+06,622441.107959,100 CEDARMONT CT,2017-11-28,True,1,2017-11-28,1,0,True
4,142247,2019-02-14,Trash - Curbside/Alley Missed Pickup,Trash was not collected from any customers on ...,"100 Clydelan Ct, Nashville, TN 37205, United S...",37205,RED RIVER,1302,34.0,1.710255e+06,640390.616149,100 CLYDELAN CT,2019-02-14,True,1,2019-02-14,1,0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13125,192335,2019-05-31,Trash - Curbside/Alley Missed Pickup,odd # side of street was not picked up.,"Old Tusculum Rd, Antioch, Tennessee, 37013",37013,RED RIVER,4404,30.0,1.763332e+06,626852.263728,OLD TUSCULUM RD,2019-05-31,True,1,2019-05-31,1,0,True
13126,133573,2019-01-23,Trash - Curbside/Alley Missed Pickup,got only one side of street,"Queens Ln, Nashville, TN 37218, United States",37218,RED RIVER,3203,1.0,1.714591e+06,686127.550777,QUEENS LN,2019-01-23,True,1,2019-01-23,1,0,True
13127,25809,2017-11-02,Trash - Curbside/Alley Missed Pickup,Robin Springs Road missed!,Robin Springs Rd,37220,RED RIVER,3302S,34.0,1.732450e+06,639784.397978,ROBIN SPRINGS RD,2017-11-02,True,1,2017-11-02,1,0,True
13128,256189,2019-10-07,Trash - Curbside/Alley Missed Pickup,this street missed,"Trevino Pl, Nashville, TN 37013, United States",37013,RED RIVER,3505,29.0,1.785940e+06,636624.255256,TREVINO PL,2019-10-07,True,1,2019-10-07,1,0,True


In [202]:
rrmissed_final = rrmissed_dedup[rrmissed_dedup['new_event'] == True]

In [203]:
rrmissed_final

Unnamed: 0,Request Number,Date Opened,Request,Description,Incident Address,Zip Code,Trash Hauler,Trash Route,Council District,State Plan X,State Plan Y,address_norm,date_only,new_event,event_id,event_date,event_seq,fine_event,first_event
0,155122,2019-03-15,Trash - Curbside/Alley Missed Pickup,MISS,"100 Bluefield Square, Nashville, TN 37214, Uni...",37214,RED RIVER,1505,15.0,1.770431e+06,666861.601362,100 BLUEFIELD SQUARE,2019-03-15,True,1,2019-03-15,1,0,True
1,52252,2018-03-07,Trash - Curbside/Alley Missed Pickup,Missed- trash,100 Braxton Hill Ct,37204,RED RIVER,3302S,25.0,1.733781e+06,640909.303557,100 BRAXTON HILL CT,2018-03-07,True,1,2018-03-07,1,0,True
2,121431,2018-12-05,Trash - Curbside/Alley Missed Pickup,Missed- trash,100 Brook Hollow Rd,37205,RED RIVER,1303,23.0,1.708043e+06,642454.642918,100 BROOK HOLLOW RD,2018-12-05,True,1,2018-12-05,1,0,True
3,30424,2017-11-28,Trash - Curbside/Alley Missed Pickup,missed pick up,100 cedarmont ct,37211,RED RIVER,4403,31.0,1.760265e+06,622441.107959,100 CEDARMONT CT,2017-11-28,True,1,2017-11-28,1,0,True
4,142247,2019-02-14,Trash - Curbside/Alley Missed Pickup,Trash was not collected from any customers on ...,"100 Clydelan Ct, Nashville, TN 37205, United S...",37205,RED RIVER,1302,34.0,1.710255e+06,640390.616149,100 CLYDELAN CT,2019-02-14,True,1,2019-02-14,1,0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13125,192335,2019-05-31,Trash - Curbside/Alley Missed Pickup,odd # side of street was not picked up.,"Old Tusculum Rd, Antioch, Tennessee, 37013",37013,RED RIVER,4404,30.0,1.763332e+06,626852.263728,OLD TUSCULUM RD,2019-05-31,True,1,2019-05-31,1,0,True
13126,133573,2019-01-23,Trash - Curbside/Alley Missed Pickup,got only one side of street,"Queens Ln, Nashville, TN 37218, United States",37218,RED RIVER,3203,1.0,1.714591e+06,686127.550777,QUEENS LN,2019-01-23,True,1,2019-01-23,1,0,True
13127,25809,2017-11-02,Trash - Curbside/Alley Missed Pickup,Robin Springs Road missed!,Robin Springs Rd,37220,RED RIVER,3302S,34.0,1.732450e+06,639784.397978,ROBIN SPRINGS RD,2017-11-02,True,1,2017-11-02,1,0,True
13128,256189,2019-10-07,Trash - Curbside/Alley Missed Pickup,this street missed,"Trevino Pl, Nashville, TN 37013, United States",37013,RED RIVER,3505,29.0,1.785940e+06,636624.255256,TREVINO PL,2019-10-07,True,1,2019-10-07,1,0,True


In [204]:
total_fines = int(rrmissed_final["fine_event"].sum())
unique_events = int(
    rrmissed_final[rrmissed_final['event_id'] != 1].drop_duplicates(['address_norm', "event_id"]).shape[0]
)

In [205]:
total_fines

827400

In [206]:
unique_events

4137

In [207]:
rrmissed_final.to_csv('rrmissed_final.csv', index = False)