In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf
import datetime

#  Import and read the csv.
import pandas as pd 
waste_df = pd.read_csv("./resources/austin_waste_and_diversion.csv", error_bad_lines=False, delimiter=',', skip_blank_lines=True)
waste_df.head()


Unnamed: 0,dropoff_site,load_id,load_time,load_type,load_weight,report_date,route_number,route_type
0,MRF,328118,5/27/2009 12:55,RECYCLING - SINGLE STREAM,4060.0,5/27/2009,DW1,RECYCLING - SINGLE STREAM
1,BRAKER SITE,308336,12/16/2008 10:55,SWEEPING,,12/16/2008,NW10-R,STREET CLEANING-RESIDENTIAL
2,TDS - MRF,541514,9/6/2012 13:56,RECYCLING - SINGLE STREAM,5060.0,9/6/2012,RHBU20,RECYCLING - SINGLE STREAM
3,TDS LANDFILL,689059,3/14/2016 7:56,SWEEPING,3720.0,3/14/2016,DSS04,SWEEPER DUMPSITES
4,HORNSBY BEND,108272,12/2/2004 15:29,YARD TRIMMING,13840.0,12/2/2004,YH08,YARD TRIMMINGS


In [2]:
routes_df = pd.read_csv("./resources/GarbageRecycle2015RR.csv", error_bad_lines=False, delimiter=',', skip_blank_lines=True)
routes_df.head()

Unnamed: 0,LANDFILL,the_geom,GARB_RT,GARB_DAY,GARB_SUP,SUPER_NUM,OP_TYPE,RT_OLD
0,TDS,MULTIPOLYGON (((-97.70618263474258 30.36741600...,PAH83,Thursday,Gilbert,697.0,Auto,
1,TDS,MULTIPOLYGON (((-97.83236579358113 30.16212865...,PAM54,Monday,Castillo,500.0,Auto,PAM84
2,TDS,MULTIPOLYGON (((-97.72977104057442 30.19009265...,PAM71,Monday,Carnline,596.0,Auto,
3,TDS,MULTIPOLYGON (((-97.65374103416144 30.26269210...,PAH54,Thursday,Castillo,500.0,Auto,PAH75
4,TDS,MULTIPOLYGON (((-97.64683377149026 30.38640132...,PAW71,Wednesday,Carnline,596.0,Auto,


In [3]:
waste_df.dtypes

dropoff_site     object
load_id           int64
load_time        object
load_type        object
load_weight     float64
report_date      object
route_number     object
route_type       object
dtype: object

In [4]:
waste_df.describe()

Unnamed: 0,load_id,load_weight
count,535181.0,478804.0
mean,421736.00694,11580.39
std,198963.512162,7800.556
min,101222.0,-4480.0
25%,245010.0,5440.0
50%,387215.0,10820.0
75%,608054.0,16560.0
max,750483.0,1562821.0


In [5]:
waste_df.nunique()

dropoff_site        33
load_id         535143
load_time       423013
load_type           18
load_weight       5116
report_date       4342
route_number      1787
route_type          37
dtype: int64

In [6]:
route_total = waste_df.groupby(["route_number"]).count()["route_type"]
route_total

route_number
0BM00            10
ABR01             3
AFD-FIREWISE      9
BKR-BR            9
BKR-YT           74
               ... 
YW12            223
YW13             59
YWNON1           28
YWNON5            3
ZILKER           51
Name: route_type, Length: 1787, dtype: int64

In [7]:
load_counts = waste_df.groupby(["load_type"]).count()["load_id"]
load_counts

load_type
BAGGED LITTER                      40
BRUSH                           30043
BULK                            29373
DEAD ANIMAL                      5109
GARBAGE COLLECTIONS            188255
LITTER                           1187
MATTRESS                            3
MIXED LITTER                     1223
MULCH                            1276
RECYCLED METAL                    613
RECYCLING - COMINGLE            29533
RECYCLING - PAPER               30490
RECYCLING - PLASTIC BAGS           38
RECYCLING - SINGLE STREAM       94336
SWEEPING                        68599
TIRES                            2237
YARD TRIMMING                   52814
YARD TRIMMING - X-MAS TREES        12
Name: load_id, dtype: int64

In [20]:
# Convert load_time to datetime 
waste_df["load_time"] = pd.to_datetime(waste_df["load_time"])
waste_df.dtypes

dropoff_site            object
load_id                  int64
load_time       datetime64[ns]
load_type               object
load_weight            float64
report_date             object
route_number            object
route_type              object
dtype: object

In [21]:
# Extract year from load_time to new load_year column
waste_df["load_year"] = waste_df["load_time"].apply(lambda x: x.year)
waste_df.head(5)

Unnamed: 0,dropoff_site,load_id,load_time,load_type,load_weight,report_date,route_number,route_type,load_year
0,MRF,328118,2009-05-27 12:55:00,RECYCLING - SINGLE STREAM,4060.0,5/27/2009,DW1,RECYCLING - SINGLE STREAM,2009
1,BRAKER SITE,308336,2008-12-16 10:55:00,SWEEPING,,12/16/2008,NW10-R,STREET CLEANING-RESIDENTIAL,2008
2,TDS - MRF,541514,2012-09-06 13:56:00,RECYCLING - SINGLE STREAM,5060.0,9/6/2012,RHBU20,RECYCLING - SINGLE STREAM,2012
3,TDS LANDFILL,689059,2016-03-14 07:56:00,SWEEPING,3720.0,3/14/2016,DSS04,SWEEPER DUMPSITES,2016
4,HORNSBY BEND,108272,2004-12-02 15:29:00,YARD TRIMMING,13840.0,12/2/2004,YH08,YARD TRIMMINGS,2004


In [42]:
# Filter by 2017
df_2017 = waste_df[waste_df.load_year == 2017]
df_2017.head()

Unnamed: 0,dropoff_site,load_id,load_time,load_type,load_weight,report_date,route_number,route_type,load_year
10,TDS LANDFILL,732809,2017-03-22 07:40:00,GARBAGE COLLECTIONS,15840.0,3/22/2017,SXSW2017,SPECIAL EVENTS,2017
56,TDS LANDFILL,736509,2017-04-19 17:35:00,GARBAGE COLLECTIONS,20680.0,4/19/2017,PAW50,GARBAGE COLLECTION,2017
62,TDS LANDFILL,724256,2017-01-12 13:29:00,GARBAGE COLLECTIONS,21660.0,1/9/2017,PM41,GARBAGE COLLECTION,2017
90,HORNSBY BEND,747204,2017-07-20 12:36:00,YARD TRIMMING,5720.0,7/20/2017,YH12,YARD TRIMMINGS,2017
92,TDS LANDFILL,740763,2017-05-24 00:00:00,BULK,5580.0,5/24/2017,BU07,BULK,2017


In [43]:
df_2017.nunique()

dropoff_site       20
load_id         26045
load_time       18951
load_type          16
load_weight      2037
report_date       253
route_number      977
route_type         26
load_year           1
dtype: int64

In [45]:
df_2017 = df_2017.drop(["load_id","load_time"], axis = 1)
df_2017.head()

Unnamed: 0,dropoff_site,load_type,load_weight,report_date,route_number,route_type,load_year
10,TDS LANDFILL,GARBAGE COLLECTIONS,15840.0,3/22/2017,SXSW2017,SPECIAL EVENTS,2017
56,TDS LANDFILL,GARBAGE COLLECTIONS,20680.0,4/19/2017,PAW50,GARBAGE COLLECTION,2017
62,TDS LANDFILL,GARBAGE COLLECTIONS,21660.0,1/9/2017,PM41,GARBAGE COLLECTION,2017
90,HORNSBY BEND,YARD TRIMMING,5720.0,7/20/2017,YH12,YARD TRIMMINGS,2017
92,TDS LANDFILL,BULK,5580.0,5/24/2017,BU07,BULK,2017


In [55]:
# weight of trash by year by route
df_2017 = df_2017.groupby(["load_year","route_number"]).sum("load_weight")
df_2017_new = pd.DataFrame(df_2017)
df_2017_new.head(20)

AttributeError: 'function' object has no attribute 'groupby'

In [49]:
df_2017.nunique

<bound method DataFrame.nunique of                         load_weight
load_year route_number             
2017      0BM00            117420.0
          BLN-01                0.0
          BR01             525100.0
          BR02             388520.0
          BR03             346910.0
...                             ...
          YW09             975760.0
          YW10             466320.0
          YW12             241580.0
          YWNON1           261480.0
          YWNON5            22600.0

[977 rows x 1 columns]>

In [53]:
df_2017 = df_2017.loc[df_2017["load_weight"] == 0]

AttributeError: 'function' object has no attribute 'loc'

In [40]:
#waste_by_year_by_route_df = waste_by_year_by_route_df[waste_by_year_by_route_df.load_year == 2017]
#waste_by_year_by_route_df.head()
waste_by_year_by_route_df.columns

Index(['load_id', 'load_weight'], dtype='object')

In [None]:
# merge 

In [None]:
# drop na

In [None]:
# convert load time to date, drop report date because report date could vary from load time

In [None]:
# figure out range of dates

In [4]:
import zipcodes
from pprint import pprint
pprint(zipcodes.similar_to('78', zips=zipcodes.filter_by(active=True, city='Austin')))

y': 'Austin',
  'country': 'US',
  'county': 'Travis County',
  'lat': '30.2919',
  'long': '-97.7165',
  'state': 'TX',
  'timezone': 'America/Chicago',
  'unacceptable_cities': [],
  'world_region': 'NA',
  'zip_code': '78722',
  'zip_code_type': 'STANDARD'},
 {'acceptable_cities': [],
  'active': True,
  'area_codes': ['512'],
  'city': 'Austin',
  'country': 'US',
  'county': 'Travis County',
  'lat': '30.3041',
  'long': '-97.6875',
  'state': 'TX',
  'timezone': 'America/Chicago',
  'unacceptable_cities': [],
  'world_region': 'NA',
  'zip_code': '78723',
  'zip_code_type': 'STANDARD'},
 {'acceptable_cities': [],
  'active': True,
  'area_codes': ['512'],
  'city': 'Austin',
  'country': 'US',
  'county': 'Travis County',
  'lat': '30.2951',
  'long': '-97.6137',
  'state': 'TX',
  'timezone': 'America/Chicago',
  'unacceptable_cities': [],
  'world_region': 'NA',
  'zip_code': '78724',
  'zip_code_type': 'STANDARD'},
 {'acceptable_cities': [],
  'active': True,
  'area_codes': [

In [2]:
pip install zipcodes

Collecting zipcodes
  Using cached zipcodes-1.1.2-py2.py3-none-any.whl (717 kB)
Installing collected packages: zipcodes
Successfully installed zipcodes-1.1.2
Note: you may need to restart the kernel to use updated packages.
