# L.A. Trash Collection

In [11]:
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import geojson
import json
import jenkspy
import numpy as np
from altair import datum
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000

In [12]:
# https://data.lacity.org/A-Livable-and-Sustainable-City/LASAN-Solid-Resources-Tonnages-Bulky-Item-E-Waste-/qwh3-ax8z
src = pd.read_csv('/Users/mhustiles/data/data/\
LA/LASAN___Solid_Resources_Tonnages_-_Bulky_Item__E-Waste__and_White_Goods.csv')

In [13]:
trash = src

In [14]:
trash.columns = trash.columns.str.strip().str.lower()\
.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

In [15]:
trash.dtypes

district             object
route                object
datetime             object
site                 object
ticketnumber         object
tons                float64
commoditycode        object
generalcommodity     object
dtype: object

In [16]:
trash['datetime'] = pd.to_datetime(trash['datetime'], format='%m/%d/%Y')
trash['datetime'] = pd.to_datetime(trash["datetime"].dt.strftime('%Y-%m-%d'))
trash['year'] = trash['datetime'].dt.year
trash['quarter'] = trash['datetime'].dt.quarter
trash['day'] = trash['datetime'].dt.day
trash['month'] = trash['datetime'].dt.month
trash['weekday'] = trash['datetime'].dt.weekday_name
trash['monthname'] = trash['datetime'].dt.month_name()

In [17]:
trash = trash[trash['datetime'] > '01/01/2016']
trash20 = trash[(trash['datetime'] > '01/01/2020')]

In [18]:
trash.head()

Unnamed: 0,district,route,datetime,site,ticketnumber,tons,commoditycode,generalcommodity,year,quarter,day,month,weekday,monthname
0,CSLA DCT,LH1,2019-10-01,Republic Sunshine Canyon Landfill,,2.0,H,Bulky,2019,4,1,10,Tuesday,October
1,CSLA DCT,LI6,2019-10-01,Republic Sunshine Canyon Landfill,,0.0,I,Bulky,2019,4,1,10,Tuesday,October
2,CSLA DCT,LI6,2019-10-01,Republic Sunshine Canyon Landfill,,0.0,I,Bulky,2019,4,1,10,Tuesday,October
3,CSLA DCT,LI6,2019-10-01,Republic Sunshine Canyon Landfill,,0.0,I,Bulky,2019,4,1,10,Tuesday,October
4,CSLA DCT,LI6,2019-10-01,Republic Sunshine Canyon Landfill,,0.0,I,Bulky,2019,4,1,10,Tuesday,October


In [None]:
route_commodity_years = trash.groupby(['year', 'route', 'generalcommodity'])\
    .agg({'tons': np.sum}).reset_index().sort_values(by='tons', ascending=False)

In [21]:
route_commodity_years.head()

Unnamed: 0,year,route,generalcommodity,tons
694,2017,AA7,Refuse,28188.98
679,2017,AA4,Refuse,27780.68
619,2017,AA2,Refuse,27734.02
1769,2019,AA2,Refuse,27380.53
1801,2019,AA3,Refuse,27138.83


In [24]:
commodity_months = trash.groupby(['year', 'month', 'generalcommodity'])\
    .agg({'tons': np.sum}).reset_index().sort_values(by='year', ascending=False)

In [12]:
district_commodity = trash.groupby([ 'district', 'generalcommodity'])\
    .agg({'tons': np.sum}).reset_index().sort_values(by='tons', ascending=False)

In [27]:
daily = trash.groupby(['datetime', 'generalcommodity', 'weekday'])\
    .agg({'tons': np.sum}).reset_index().sort_values(by='tons', ascending=False)

In [28]:
daily_trash = daily[(daily.generalcommodity == 'Refuse') & (daily.datetime > '2016/01/01')].groupby(['weekday','generalcommodity', 'datetime'])\
    .agg({'tons': np.sum}).reset_index().sort_values(by='datetime', ascending=True)

In [29]:
daily_trash['tons'] = daily_trash['tons'].round()

In [30]:
daily_trash.to_csv('/Users/mhustiles/data/github/notebooks/la-sanitation/output/daily_trash.csv')

In [33]:
daily_trash.sort_values(by='tons', ascending=False)

Unnamed: 0,weekday,generalcommodity,datetime,tons
1099,Thursday,Refuse,2020-01-02,4299.0
348,Monday,Refuse,2018-06-04,4093.0
430,Monday,Refuse,2019-12-30,4061.0
400,Monday,Refuse,2019-06-03,4038.0
1165,Tuesday,Refuse,2016-12-27,4033.0
...,...,...,...,...
776,Sunday,Refuse,2018-01-28,11.0
678,Sunday,Refuse,2016-03-13,9.0
890,Sunday,Refuse,2020-04-05,5.0
889,Sunday,Refuse,2020-03-29,5.0


In [35]:
alt.Chart(daily_trash).mark_bar(size=2).encode(
    x='datetime',
    y='tons'
)

In [36]:
alt.Chart(
    daily_trash,
    title="Trash"
).mark_rect().encode(
    x='year(datetime):O',
    y='month(datetime):O',
    color=alt.Color('sum(tons):Q', scale=alt.Scale(scheme="greens")),
    tooltip=[
        alt.Tooltip('month(datetime):T', title='Date'),
        alt.Tooltip('sum(tons):Q', title='Tons')
    ]
).properties(width=750)