# Explore TRI facility releases over time

I want to explore trends over time. I downloaded each TRI basic data file for the US for all years available, 1987-2018 to retrieve facility location information and release amounts. RSEI scores and modeled hazards were downloaded for years available, 2007-2018.

In [1]:
# import packages
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely.geometry import mapping
from functools import reduce
import os
import re
import numpy as np

import warnings
warnings.simplefilter(action='ignore')

# supress scientific notation
pd.set_option('display.float_format', lambda x: '%.5f' % x)

### Wrangle TRI facility location information

First I want to combine all the TRI data files for each year into one dataframe.

In [2]:
# define folder containing all tri files
folder = "../data/tri-data"

# create empty list to hold dataframes
fileList = list()

# add each file to the dataframe list
for file in os.listdir(folder):
    df = pd.read_csv(os.path.join(folder, file))
    fileList.append(df)
    
# concatenate all list of dataframes
combined = pd.concat(fileList, axis=0, ignore_index=True)
combined

Unnamed: 0,1. YEAR,2. TRIFD,3. FRS ID,4. FACILITY NAME,5. STREET ADDRESS,6. CITY,7. COUNTY,8. ST,9. ZIP,10. BIA,...,107. 8.3 - ENERGY RECOVER OF,108. 8.4 - RECYCLING ON SITE,109. 8.5 - RECYCLING OFF SIT,110. 8.6 - TREATMENT ON SITE,111. 8.7 - TREATMENT OFF SITE,112. PRODUCTION WSTE (8.1-8.7),113. 8.8 - ONE-TIME RELEASE,114. PROD_RATIO_OR_ ACTIVITY,115. 8.9 - PRODUCTION RATIO,Unnamed: 115
0,1987,94304QLTYT3400H,110000609887,QUALITY TECH,3400 HILLVIEW AVE,PALO ALTO,SANTA CLARA,CA,94304,,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,,,,
1,1987,45377NLNDD480ND,110000394467,DELPHI ENERGY & CHASSIS VANDALIA OPERATIONS,480 N DIXIE DR,VANDALIA,MONTGOMERY,OH,45377,,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,,,,
2,1987,27215PRKVN1821N,110043582101,GLEN RAVEN TECHNICAL FABRICS LLC,1821 N PARK AVE,BURLINGTON,ALAMANCE,NC,27217,,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,,,,
3,1987,70058VNSCP1255P,110000448757,GREIF USA (DBA GREIF HARVEY DCS HARVEY),1255 PETERS RD,HARVEY,JEFFERSON PARISH,LA,70058,,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,,,,
4,1987,55425VTCNC2401E,110002123158,VTC INC RIVER RIDGE,2401 E 28TH ST,BLOOMINGTON,HENNEPIN,MN,55425,,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2739472,2018,42366PRMML5680O,110000380971,HINES PRECISION INC,5680 OLD KY 54,PHILPOT,DAVIESS,KY,42366,,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,,,,
2739473,2018,37066STVGL1499S,110000493136,US TVA GALLATIN FOSSIL PLANT,1499 STEAM PLANT RD,GALLATIN,SUMNER,TN,37066,,...,0.00000,0.00000,0.00000,0.00000,0.00000,9.40000,,PRODUCTION,0.98000,
2739474,2018,6416WPNNYS861NE,110020834276,PENNYS CONCRETE,8601 NE 38TH STREET,KANSAS CITY,CLAY,MO,64161,,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,,ACTIVITY,1.00000,
2739475,2018,46368BTSTL6500S,110000398212,NLMK INDIANA,6500 S BOUNDARY RD,PORTAGE,PORTER,IN,46368,,...,0.00000,0.00000,53429.00000,0.00000,0.00000,53752.12000,,PRODUCTION,1.00000,


In [3]:
# create copy to revert back to
tri = combined.copy()

In [4]:
# remove leading numbers, periods, and space and from column names
tri.columns = tri.columns.str.lstrip('0123456789.- ')

# replace spaces in column names with underscores
tri.columns = tri.columns.str.replace(' ', '_')

# replace dashes in column names with underscores
tri.columns = tri.columns.str.replace('-', '_')

#convert column names to uppercase
tri.columns = map(str.upper, tri.columns)

tri.head()

Unnamed: 0,YEAR,TRIFD,FRS_ID,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,...,ENERGY_RECOVER_OF,RECYCLING_ON_SITE,RECYCLING_OFF_SIT,TREATMENT_ON_SITE,TREATMENT_OFF_SITE,PRODUCTION_WSTE_(8.1_8.7),ONE_TIME_RELEASE,PROD_RATIO_OR__ACTIVITY,PRODUCTION_RATIO,UNNAMED:_115
0,1987,94304QLTYT3400H,110000609887,QUALITY TECH,3400 HILLVIEW AVE,PALO ALTO,SANTA CLARA,CA,94304,,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
1,1987,45377NLNDD480ND,110000394467,DELPHI ENERGY & CHASSIS VANDALIA OPERATIONS,480 N DIXIE DR,VANDALIA,MONTGOMERY,OH,45377,,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
2,1987,27215PRKVN1821N,110043582101,GLEN RAVEN TECHNICAL FABRICS LLC,1821 N PARK AVE,BURLINGTON,ALAMANCE,NC,27217,,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
3,1987,70058VNSCP1255P,110000448757,GREIF USA (DBA GREIF HARVEY DCS HARVEY),1255 PETERS RD,HARVEY,JEFFERSON PARISH,LA,70058,,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
4,1987,55425VTCNC2401E,110002123158,VTC INC RIVER RIDGE,2401 E 28TH ST,BLOOMINGTON,HENNEPIN,MN,55425,,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,


In [5]:
# replace spaces in column names with underscores
tri.columns = tri.columns.str.replace(' ', '_')

# replace dashes in column names with underscores
tri.columns = tri.columns.str.replace('-', '_')

#convert column names to uppercase
tri.columns = map(str.upper, tri.columns)

tri.head()

Unnamed: 0,YEAR,TRIFD,FRS_ID,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,...,ENERGY_RECOVER_OF,RECYCLING_ON_SITE,RECYCLING_OFF_SIT,TREATMENT_ON_SITE,TREATMENT_OFF_SITE,PRODUCTION_WSTE_(8.1_8.7),ONE_TIME_RELEASE,PROD_RATIO_OR__ACTIVITY,PRODUCTION_RATIO,UNNAMED:_115
0,1987,94304QLTYT3400H,110000609887,QUALITY TECH,3400 HILLVIEW AVE,PALO ALTO,SANTA CLARA,CA,94304,,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
1,1987,45377NLNDD480ND,110000394467,DELPHI ENERGY & CHASSIS VANDALIA OPERATIONS,480 N DIXIE DR,VANDALIA,MONTGOMERY,OH,45377,,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
2,1987,27215PRKVN1821N,110043582101,GLEN RAVEN TECHNICAL FABRICS LLC,1821 N PARK AVE,BURLINGTON,ALAMANCE,NC,27217,,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
3,1987,70058VNSCP1255P,110000448757,GREIF USA (DBA GREIF HARVEY DCS HARVEY),1255 PETERS RD,HARVEY,JEFFERSON PARISH,LA,70058,,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
4,1987,55425VTCNC2401E,110002123158,VTC INC RIVER RIDGE,2401 E 28TH ST,BLOOMINGTON,HENNEPIN,MN,55425,,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,


In [6]:
list(tri.columns)

['YEAR',
 'TRIFD',
 'FRS_ID',
 'FACILITY_NAME',
 'STREET_ADDRESS',
 'CITY',
 'COUNTY',
 'ST',
 'ZIP',
 'BIA',
 'TRIBE',
 'LATITUDE',
 'LONGITUDE',
 'PARENT_CO_NAME',
 'PARENT_CO_DB_NUM',
 'STANDARD_PARENT_CO_NAME',
 'FEDERAL_FACILITY',
 'INDUSTRY_SECTOR_CODE',
 'INDUSTRY_SECTOR',
 'PRIMARY_SIC',
 'SIC_2',
 'SIC_3',
 'SIC_4',
 'SIC_5',
 'SIC_6',
 'PRIMARY_NAICS',
 'NAICS_2',
 'NAICS_3',
 'NAICS_4',
 'NAICS_5',
 'NAICS_6',
 'DOC_CTRL_NUM',
 'CHEMICAL',
 'ELEMENTAL_METAL_INCLUDED',
 'CAS_#/COMPOUND_ID',
 'SRS_ID',
 'CLEAN_AIR_ACT_CHEMICAL',
 'CLASSIFICATION',
 'METAL',
 'METAL_CATEGORY',
 'CARCINOGEN',
 'FORM_TYPE',
 'UNIT_OF_MEASURE',
 'FUGITIVE_AIR',
 'STACK_AIR',
 'WATER',
 'UNDERGROUND',
 'UNDERGROUND_CL_I',
 'UNDERGROUND_C_II_V',
 'LANDFILLS',
 'A___RCRA_C_LANDFILL',
 'B___OTHER_LANDFILLS',
 'LAND_TREATMENT',
 'SURFACE_IMPNDMNT',
 'A___RCRA_SURFACE_IM',
 'B___OTHER_SURFACE_I',
 'OTHER_DISPOSAL',
 'ON_SITE_RELEASE_TOTAL',
 'POTW___TRNS_RLSE',
 'POTW___TRNS_TRT',
 'POTW___TOTAL_TRANSFE

[TRI basic data files documentation](https://www.epa.gov/sites/production/files/2019-08/documents/basic_data_files_documentation_aug_2019_v2.pdf)

On-site release total = Total quantity of the toxic chemical released to air, water and
land on-site at the facility.

Off-site release total = Total quantity of the toxic chemical reported as transferred to
off-site locations for release or disposal. 

There are several columns for various types of land release, and they vary by year as reporting changed in 1996 and 2003. Water and air release columns are consistent for each year, so for now I will just stick with on-site release totals and air and water releases. I will not look at off-site releases, because waste can be double counted if counted once by facility as off-site release, and then again by the receiving facility as on-site release.

Ultimately I want one record per facility, with columns for on-site release totals, plus release totals for water and air release, for each year. There is only one column for water release, and air releases are reported by fugitive and stack. Most chemicals are reported in pounds, except for dioxides which are reported in grams. So before any aggregating I also need to convert records measured in grams into pounds.

I will also sum RSEI score and RSEI modeled hazards to join to the TRI facilities.

In [7]:
# create new column summing fugitive air and stack air release
tri['AIR'] = tri['FUGITIVE_AIR'] + tri['STACK_AIR']

In [141]:
# filter columns to keep
tri_filter = tri.filter(['YEAR','TRIFD','FACILITY_NAME','STREET_ADDRESS','CITY','COUNTY','ST','ZIP','BIA','TRIBE','LATITUDE','LONGITUDE','INDUSTRY_SECTOR','PARENT_CO_NAME', 'CHEMICAL', 'WATER','ON_SITE_RELEASE_TOTAL','AIR','UNIT_OF_MEASURE'], axis=1)
tri_filter.head()

Unnamed: 0,YEAR,TRIFD,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,LONGITUDE,INDUSTRY_SECTOR,PARENT_CO_NAME,CHEMICAL,WATER,ON_SITE_RELEASE_TOTAL,AIR,UNIT_OF_MEASURE
0,1987,94304QLTYT3400H,QUALITY TECH,3400 HILLVIEW AVE,PALO ALTO,SANTA CLARA,CA,94304,,,37.40348,-122.146,Computers and Electronic Products,QUALITY TECH,PHOSPHORIC ACID,0.0,250.0,250.0,Pounds
1,1987,45377NLNDD480ND,DELPHI ENERGY & CHASSIS VANDALIA OPERATIONS,480 N DIXIE DR,VANDALIA,MONTGOMERY,OH,45377,,,39.9004,-84.19882,Transportation Equipment,DELPHI AUTOMOTIVE SYSTEMS LLC,"SULFURIC ACID (1994 AND AFTER ACID AEROSOLS"" O...",0.0,0.0,0.0,Pounds
2,1987,27215PRKVN1821N,GLEN RAVEN TECHNICAL FABRICS LLC,1821 N PARK AVE,BURLINGTON,ALAMANCE,NC,27217,,,36.10932,-79.46178,Textiles,GLEN RAVEN TECHNICAL FABRICS LLC,AMMONIUM SULFATE (SOLUTION),0.0,0.0,0.0,Pounds
3,1987,70058VNSCP1255P,GREIF USA (DBA GREIF HARVEY DCS HARVEY),1255 PETERS RD,HARVEY,JEFFERSON PARISH,LA,70058,,,29.88758,-90.07416,Fabricated Metals,GREIF USA,N-NITROSODI-N-PROPYLAMINE,0.0,2700.0,2700.0,Pounds
4,1987,55425VTCNC2401E,VTC INC RIVER RIDGE,2401 E 28TH ST,BLOOMINGTON,HENNEPIN,MN,55425,,,44.95979,-93.23143,Computers and Electronic Products,CONTROL DATA CORP,"SULFURIC ACID (1994 AND AFTER ACID AEROSOLS"" O...",0.0,0.0,0.0,Pounds


In [9]:
# inspect the different units of measure
tri_filter['UNIT_OF_MEASURE'].unique()

array(['Pounds', 'Grams'], dtype=object)

In [10]:
# # convert any gram measurements to pounds
# for index, row in tri_filter.iterrows(): # loop through each row
#     if row['UNIT_OF_MEASURE'] == 'Grams':
#         tri_filter.loc[index, 'ON_SITE_RELEASE_TOTAL'] = row['ON_SITE_RELEASE_TOTAL']/454
#         tri_filter.loc[index, 'WATER'] = row['WATER']/454
#         tri_filter.loc[index, 'AIR'] = row['AIR']/454

In [11]:
# filter all records measured in grams
grams = tri_filter.loc[tri_filter['UNIT_OF_MEASURE'] == 'Grams']

# convert gram measure to pounds
grams['ON_SITE_RELEASE_TOTAL'] = grams['ON_SITE_RELEASE_TOTAL']/454
grams['WATER'] = grams['WATER']/454
grams['AIR'] = grams['AIR']/454

# change unit of measure to pounds
grams['UNIT_OF_MEASURE'] = 'Pounds'

# check results
grams

Unnamed: 0,YEAR,TRIFD,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,LONGITUDE,INDUSTRY_SECTOR,PARENT_CO_NAME,WATER,ON_SITE_RELEASE_TOTAL,AIR,UNIT_OF_MEASURE
960756,1998,95017RMCPC700HW,RMC PACIFIC MATERIALS DBA CEMEX,700 HIGHWAY 1,DAVENPORT,SANTA CRUZ,CA,95017,,,37.01563,-122.19902,Nonmetallic Mineral Product,CEMEX,0.00000,0.00047,0.00047,Pounds
1009938,1999,95017RMCPC700HW,RMC PACIFIC MATERIALS DBA CEMEX,700 HIGHWAY 1,DAVENPORT,SANTA CRUZ,CA,95017,,,37.01563,-122.19902,Nonmetallic Mineral Product,CEMEX,0.00000,0.00050,0.00050,Pounds
1058622,1999,46302XDCRP2601W,EXIDE TECHNOLOGIES,2601 W MOUNT PLEASANT BLVD,MUNCIE,DELAWARE,IN,47302,,,40.15696,-85.41742,Primary Metals,EXIDE TECHNOLOGIES,0.00000,0.00220,0.00220,Pounds
1086371,2000,71411WLLMTHIGHW,INTERNATIONAL PAPER / RED RIVER MILL,4537 HWY 480,CAMPTI,NATCHITOCHES PARISH,LA,71411,,,31.90533,-93.17364,Paper,INTERNATIONAL PAPER CO,0.00000,0.00137,0.00126,Pounds
1086534,2000,16531GNRLL2901E,GE TRANSPORTATION - ERIE,2901 E LAKE RD,ERIE,ERIE,PA,16531,,,42.14545,-80.02544,Transportation Equipment,WABTEC US RAIL INC.,0.00000,0.00070,0.00070,Pounds
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2739119,2018,37716STVBL1265E,US TVA BULL RUN FOSSIL PLANT,1265 EDGEMOOR RD,CLINTON,ANDERSON,TN,37716,,,36.02280,-84.15180,Electric Utilities,US TENNESSEE VALLEY AUTHORITY,0.00000,0.00062,0.00062,Pounds
2739221,2018,36548CHRLSCARSO,CHARLES R LOWMAN POWER PLANT,4392 CARSON RD,LEROY,WASHINGTON,AL,36548,,,31.48870,-87.91030,Electric Utilities,POWERSOUTH ENERGY COOPERATIVE,0.00000,0.00053,0.00053,Pounds
2739320,2018,84116CHVRN2351N,CHEVRON PRODUCTS CO - SALT LAKE REFINERY,2351 NORTH 1100 WEST,SALT LAKE CITY,SALT LAKE,UT,84116,,,40.82490,-111.92380,Petroleum,CHEVRON CORP,0.00002,0.00002,0.00000,Pounds
2739400,2018,9852WSPBRD31HAG,SPI ABERDEEN-GRAYS HARBOR,301 HAGARA ST,ABERDEEN,GRAYS HARBOR,WA,98520,,,46.97306,-123.77805,Wood Products,SIERRA PACIFIC INDUSTRIES,0.00000,0.00163,0.00163,Pounds


In [12]:
# append converted records back to tri dataframe
tri_filter.append(grams)

# redefine tri_filter dataframe as records measured in pounds
tri_filter = tri_filter[tri_filter['UNIT_OF_MEASURE'] == 'Pounds']

tri_filter

Unnamed: 0,YEAR,TRIFD,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,LONGITUDE,INDUSTRY_SECTOR,PARENT_CO_NAME,WATER,ON_SITE_RELEASE_TOTAL,AIR,UNIT_OF_MEASURE
0,1987,94304QLTYT3400H,QUALITY TECH,3400 HILLVIEW AVE,PALO ALTO,SANTA CLARA,CA,94304,,,37.40348,-122.14600,Computers and Electronic Products,QUALITY TECH,0.00000,250.00000,250.00000,Pounds
1,1987,45377NLNDD480ND,DELPHI ENERGY & CHASSIS VANDALIA OPERATIONS,480 N DIXIE DR,VANDALIA,MONTGOMERY,OH,45377,,,39.90040,-84.19882,Transportation Equipment,DELPHI AUTOMOTIVE SYSTEMS LLC,0.00000,0.00000,0.00000,Pounds
2,1987,27215PRKVN1821N,GLEN RAVEN TECHNICAL FABRICS LLC,1821 N PARK AVE,BURLINGTON,ALAMANCE,NC,27217,,,36.10932,-79.46178,Textiles,GLEN RAVEN TECHNICAL FABRICS LLC,0.00000,0.00000,0.00000,Pounds
3,1987,70058VNSCP1255P,GREIF USA (DBA GREIF HARVEY DCS HARVEY),1255 PETERS RD,HARVEY,JEFFERSON PARISH,LA,70058,,,29.88758,-90.07416,Fabricated Metals,GREIF USA,0.00000,2700.00000,2700.00000,Pounds
4,1987,55425VTCNC2401E,VTC INC RIVER RIDGE,2401 E 28TH ST,BLOOMINGTON,HENNEPIN,MN,55425,,,44.95979,-93.23143,Computers and Electronic Products,CONTROL DATA CORP,0.00000,0.00000,0.00000,Pounds
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2739472,2018,42366PRMML5680O,HINES PRECISION INC,5680 OLD KY 54,PHILPOT,DAVIESS,KY,42366,,,37.73491,-87.00282,Fabricated Metals,,0.00000,0.00000,0.00000,Pounds
2739473,2018,37066STVGL1499S,US TVA GALLATIN FOSSIL PLANT,1499 STEAM PLANT RD,GALLATIN,SUMNER,TN,37066,,,36.32460,-86.40260,Electric Utilities,US TENNESSEE VALLEY AUTHORITY,0.00000,9.40000,3.70000,Pounds
2739474,2018,6416WPNNYS861NE,PENNYS CONCRETE,8601 NE 38TH STREET,KANSAS CITY,CLAY,MO,64161,,,39.16218,-94.48128,Nonmetallic Mineral Product,PENNYS CONCRETE,0.00000,0.00000,0.00000,Pounds
2739475,2018,46368BTSTL6500S,NLMK INDIANA,6500 S BOUNDARY RD,PORTAGE,PORTER,IN,46368,,,41.62152,-87.16148,Primary Metals,,5.92000,6.02000,0.10000,Pounds


In [None]:
# now reduce filtered dataframe to find one record per facility
# use group by TRIFID field, and only keep the identifying fields about each facility
# will use this to join to after performing pivot table operations
all_tri = tri_filter.fillna('').groupby('TRIFD').agg({'FACILITY_NAME':'first',
                             'STREET_ADDRESS':'first',
                             'CITY':'first',
                             'COUNTY':'first',
                             'ST':'first',
                             'ZIP':'first',
                             'BIA':'first',
                             'TRIBE':'first',
                             'LATITUDE':'first',
                             'LONGITUDE':'first',
                             'INDUSTRY_SECTOR':'first',
                             'PARENT_CO_NAME': 'first'
                                               }).reset_index()

all_tri

In [13]:
# # filter columns to keep
# tri_filter = tri.filter(['YEAR','TRIFD','FACILITY_NAME','STREET_ADDRESS','CITY','COUNTY','ST','ZIP','BIA','TRIBE','LATITUDE','LONGITUDE','INDUSTRY_SECTOR','PARENT_CO_NAME','WATER','ON_SITE_RELEASE_TOTAL','AIR_TOTAL'], axis=1)
# tri_filter.head()

In [45]:
# create pivot table summing totals to create records per facility per year, with total releases
pivot = pd.pivot_table(tri_filter,index=['YEAR', 'TRIFD'], values=['ON_SITE_RELEASE_TOTAL'],aggfunc=np.sum)
pivot

Unnamed: 0_level_0,Unnamed: 1_level_0,ON_SITE_RELEASE_TOTAL
YEAR,TRIFD,Unnamed: 2_level_1
1987,00602BXTRHRD115,0.00000
1987,00602DSCFPPOBOX,21862.00000
1987,00602GRNGRAGUAD,14723.00000
1987,00605BRNQNPR467,0.00000
1987,00605VNMRBMONTA,19147.00000
...,...,...
2018,99801CRLSK331CL,2212952.20000
2018,99801KNNCT13401,55589597.00000
2018,9980WCRWLY176JA,2185.28000
2018,99901SCSTG1300S,0.80000


In [46]:
# rename columns to simplify names
pivot = pivot.rename(columns={'ON_SITE_RELEASE_TOTAL':'TOTAL'})
pivot

Unnamed: 0_level_0,Unnamed: 1_level_0,TOTAL
YEAR,TRIFD,Unnamed: 2_level_1
1987,00602BXTRHRD115,0.00000
1987,00602DSCFPPOBOX,21862.00000
1987,00602GRNGRAGUAD,14723.00000
1987,00605BRNQNPR467,0.00000
1987,00605VNMRBMONTA,19147.00000
...,...,...
2018,99801CRLSK331CL,2212952.20000
2018,99801KNNCT13401,55589597.00000
2018,9980WCRWLY176JA,2185.28000
2018,99901SCSTG1300S,0.80000


In [54]:
# unstack pivot table to create columns for release type for each year
amountsUnstacked = pivot.unstack(level=0)
amountsUnstacked

Unnamed: 0_level_0,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL,TOTAL
YEAR,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
TRIFD,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
00602BXTRF111CO,,11700.00000,12000.00000,,,,,,,,...,,,,,,,,,,
00602BXTRHRD115,0.00000,250.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,,,,,,,,,,
00602DSCFPPOBOA,,,17447.00000,4086.00000,,,,,,,...,,,,,,,,,,
00602DSCFPPOBOX,21862.00000,,17717.00000,14730.00000,,,,,,,...,,,,,,,,,,
00602GRNGRAGUAD,14723.00000,,22349.00000,25133.00000,4900.00000,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99901LSKSH381TN,,,,,,,,,,,...,,,9362.00000,1534.00000,,8043.31460,,,,
99901SCSTG1300S,,,,,,,,,,,...,0.00000,0.00000,0.00000,0.00000,0.49320,0.29320,0.10520,0.14120,0.11410,0.80000
99901TSRLS1010S,,,,,,,,,,,...,,,,,,,,,,
9990WCRWLY9STED,,,,,,,,,,,...,,,,,,1769.95000,1825.74000,1474.49000,1412.03000,1397.34000


In [55]:
# rename columns to reflect year and release type
amountsUnstacked.columns = [''.join(str(s).strip() for s in col if s) for col in amountsUnstacked.columns]
amountsUnstacked

Unnamed: 0_level_0,TOTAL1987,TOTAL1988,TOTAL1989,TOTAL1990,TOTAL1991,TOTAL1992,TOTAL1993,TOTAL1994,TOTAL1995,TOTAL1996,...,TOTAL2009,TOTAL2010,TOTAL2011,TOTAL2012,TOTAL2013,TOTAL2014,TOTAL2015,TOTAL2016,TOTAL2017,TOTAL2018
TRIFD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00602BXTRF111CO,,11700.00000,12000.00000,,,,,,,,...,,,,,,,,,,
00602BXTRHRD115,0.00000,250.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,,,,,,,,,,
00602DSCFPPOBOA,,,17447.00000,4086.00000,,,,,,,...,,,,,,,,,,
00602DSCFPPOBOX,21862.00000,,17717.00000,14730.00000,,,,,,,...,,,,,,,,,,
00602GRNGRAGUAD,14723.00000,,22349.00000,25133.00000,4900.00000,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99901LSKSH381TN,,,,,,,,,,,...,,,9362.00000,1534.00000,,8043.31460,,,,
99901SCSTG1300S,,,,,,,,,,,...,0.00000,0.00000,0.00000,0.00000,0.49320,0.29320,0.10520,0.14120,0.11410,0.80000
99901TSRLS1010S,,,,,,,,,,,...,,,,,,,,,,
9990WCRWLY9STED,,,,,,,,,,,...,,,,,,1769.95000,1825.74000,1474.49000,1412.03000,1397.34000


In [103]:
amountsUnstacked.fillna(0)
amountsUnstacked.loc['TOTAL_RELEASE']= amountsUnstacked.sum()
amountsUnstacked

Unnamed: 0_level_0,TOTAL1987,TOTAL1988,TOTAL1989,TOTAL1990,TOTAL1991,TOTAL1992,TOTAL1993,TOTAL1994,TOTAL1995,TOTAL1996,...,TOTAL2009,TOTAL2010,TOTAL2011,TOTAL2012,TOTAL2013,TOTAL2014,TOTAL2015,TOTAL2016,TOTAL2017,TOTAL2018
TRIFD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00602BXTRF111CO,,11700.00000,12000.00000,,,,,,,,...,,,,,,,,,,
00602BXTRHRD115,0.00000,250.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,,,,,,,,,,
00602DSCFPPOBOA,,,17447.00000,4086.00000,,,,,,,...,,,,,,,,,,
00602DSCFPPOBOX,21862.00000,,17717.00000,14730.00000,,,,,,,...,,,,,,,,,,
00602GRNGRAGUAD,14723.00000,,22349.00000,25133.00000,4900.00000,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99901SCSTG1300S,,,,,,,,,,,...,0.00000,0.00000,0.00000,0.00000,0.49320,0.29320,0.10520,0.14120,0.11410,0.80000
99901TSRLS1010S,,,,,,,,,,,...,,,,,,,,,,
9990WCRWLY9STED,,,,,,,,,,,...,,,,,,1769.95000,1825.74000,1474.49000,1412.03000,1397.34000
99929TRDNT641SH,,,,,,,,,,,...,0.00000,,,,,,,,,


In [108]:
amountsUnstacked.to_json("../data/total-releases.json")

In [114]:
# now find top values for each column
# declare empty dataframe to hold top ordered rows
topRows = pd.DataFrame()

# create list of column names
columns = list(amountsUnstacked.columns)

# loop through each column
# finding the 100 largest values in each
# and appending to empty topRows dataframe
for column in columns:
    topValues = amountsUnstacked.nlargest(100, column)
    topRows = topRows.append(topValues)

topRows

Unnamed: 0_level_0,TOTAL1987,TOTAL1988,TOTAL1989,TOTAL1990,TOTAL1991,TOTAL1992,TOTAL1993,TOTAL1994,TOTAL1995,TOTAL1996,...,TOTAL2009,TOTAL2010,TOTAL2011,TOTAL2012,TOTAL2013,TOTAL2014,TOTAL2015,TOTAL2016,TOTAL2017,TOTAL2018
TRIFD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TOTAL_RELEASE,17495362548.00000,5680148075.00000,4452091324.00000,3753465237.00000,3496917822.96000,3318950504.64000,2909276729.72000,2365597221.36000,2387160808.09000,2318409398.65000,...,3038737468.64420,3401823195.53792,3701652530.05003,3232491920.86028,3727353563.30422,3544714287.81079,2972866487.44393,3134064227.29282,3541607790.97415,3367991070.30157
93562KRRMC13200,5229012615.00000,1300574.00000,1078748.00000,1275504.00000,1039515.00000,634025.00000,556795.00000,415897.00000,320000.00000,517953.00000,...,,,,,,,,,,
77978LMNMCSTATE,468874750.00000,10700650.00000,30250.00000,19605.00000,10855.00000,8513.00000,2086.00000,458.00000,1046.00000,961.00000,...,104538.00000,178771.10000,199796.00000,188280.10000,215479.80000,229453.30000,235630.60000,31152.40000,,
76567LMNMCSANDO,329112650.00000,4683150.00000,1448233.00000,1440839.00000,1214510.00000,978813.00000,780927.00000,1177274.00000,914029.00000,1908875.00000,...,180854.27000,264000.00000,353000.00000,159422.00000,133398.00000,62416.00000,,,,
70094MRCNC10800,217298702.00000,176386400.00000,192340500.00000,162030982.00000,142009076.00000,147088590.00000,120149724.00000,20514100.00000,29534568.00000,28561121.00000,...,14033632.00000,15611971.00000,12514975.00000,13033799.00000,9981778.00000,11626902.87000,12079217.00000,12144943.00000,11604350.00000,14758762.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46392NRTHR2723E,,,,,,,,,,,...,4287714.00000,4556925.00000,3397391.00000,2345310.00000,3741248.00000,4281160.00000,2885054.00000,2886632.00000,3142912.00000,3749726.00000
37050STVCM815CU,,,,,,,,,,,...,5382106.50000,4715736.90000,4363220.40000,5097929.20000,4722749.70000,4433526.80000,4774354.40000,4607203.50000,3406534.70000,3725472.70000
70669KRNSL3300B,,,,,,13583.00000,1198990.00000,1705506.00000,2523776.00000,2821002.00000,...,3066783.00000,3298216.00000,3321821.00000,3592649.00000,3183305.00000,3284455.00000,3382155.00000,3492605.00000,3389960.00000,3725055.00000
48161DTRTD3500E,,,,,,,,,,,...,14618971.97000,9587098.76000,10049596.03000,9017157.56000,7705028.59000,3535352.53000,3450607.93000,3577156.57000,3608092.43000,3721469.62000


In [116]:
topRows = topRows.drop_duplicates()
topRows

Unnamed: 0_level_0,TOTAL1987,TOTAL1988,TOTAL1989,TOTAL1990,TOTAL1991,TOTAL1992,TOTAL1993,TOTAL1994,TOTAL1995,TOTAL1996,...,TOTAL2009,TOTAL2010,TOTAL2011,TOTAL2012,TOTAL2013,TOTAL2014,TOTAL2015,TOTAL2016,TOTAL2017,TOTAL2018
TRIFD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TOTAL_RELEASE,17495362548.00000,5680148075.00000,4452091324.00000,3753465237.00000,3496917822.96000,3318950504.64000,2909276729.72000,2365597221.36000,2387160808.09000,2318409398.65000,...,3038737468.64420,3401823195.53792,3701652530.05003,3232491920.86028,3727353563.30422,3544714287.81079,2972866487.44393,3134064227.29282,3541607790.97415,3367991070.30157
93562KRRMC13200,5229012615.00000,1300574.00000,1078748.00000,1275504.00000,1039515.00000,634025.00000,556795.00000,415897.00000,320000.00000,517953.00000,...,,,,,,,,,,
77978LMNMCSTATE,468874750.00000,10700650.00000,30250.00000,19605.00000,10855.00000,8513.00000,2086.00000,458.00000,1046.00000,961.00000,...,104538.00000,178771.10000,199796.00000,188280.10000,215479.80000,229453.30000,235630.60000,31152.40000,,
76567LMNMCSANDO,329112650.00000,4683150.00000,1448233.00000,1440839.00000,1214510.00000,978813.00000,780927.00000,1177274.00000,914029.00000,1908875.00000,...,180854.27000,264000.00000,353000.00000,159422.00000,133398.00000,62416.00000,,,,
70094MRCNC10800,217298702.00000,176386400.00000,192340500.00000,162030982.00000,142009076.00000,147088590.00000,120149724.00000,20514100.00000,29534568.00000,28561121.00000,...,14033632.00000,15611971.00000,12514975.00000,13033799.00000,9981778.00000,11626902.87000,12079217.00000,12144943.00000,11604350.00000,14758762.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89015TTNMMPOBOX,250.00000,500.00000,1540.00000,173117.00000,158451.00000,125775.00000,122155.00000,49500.00000,65100.00000,53100.00000,...,6315.00000,3657.00000,2572.00000,8126.00000,7797.00000,12651.00000,9600.00000,3216860.00000,4801870.00000,1891730.00000
19706TXCDL2000W,7362990.00000,911024.00000,507911.00000,567124.00000,465922.00000,408037.00000,344549.00000,292758.00000,195207.00000,162642.00000,...,1596283.00000,432302.50000,1145901.70000,3689999.50000,3043548.10000,3157776.60000,3728165.76000,2698200.38000,3995973.11600,5665306.55000
38563SVGZN120ZI,,,,,,,,,,,...,68628.71000,130402.60000,209584.00000,1628325.70000,1716833.55000,2327303.72700,2804096.16000,86003.21000,2849557.00000,4357062.23812
0073WLLDWSBARAM,,,,,,,,,,,...,,,,,,520700.00000,954568.00000,216040.00000,3020524.00000,3990680.00000


In [117]:
# merge topRows dataframe to reduced dataframe with one record per facility, with lat/long values
merged = all_tri.merge(topRows, on='TRIFD')
merged

Unnamed: 0,TRIFD,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,...,TOTAL2009,TOTAL2010,TOTAL2011,TOTAL2012,TOTAL2013,TOTAL2014,TOTAL2015,TOTAL2016,TOTAL2017,TOTAL2018
0,0073WLLDWSBARAM,ALLIED WASTE OF PONCE,BARAMAYA AVENUE - PR 500 - COTORRA WARD,PONCE,PONCE MUNICIPIO,PR,732,,,18.00226,...,,,,,,520700.00000,954568.00000,216040.00000,3020524.00000,3990680.00000
1,03570JMSRV650MA,FRASER N.H. LLC - BERLIN PULP MILL,650 MAIN ST,BERLIN,COOS,NH,3570,,,44.47194,...,,,,,,,,,,
2,04239NTRNTRILEY,PIXELLE SPECIALTY SOLUTIONS,300 RILEY RD - ANDROSCOGGIN MILL,JAY,FRANKLIN,ME,4239,,,44.50633,...,1798082.10000,1898321.30000,2114151.09000,1616165.37000,1887060.64000,1396386.86000,1083316.18000,768153.61000,3061296.84000,3007123.32000
3,04276BSCSCROUTE,ND PAPER INC - RUMFORD DIV,35 HARTFORD ST,RUMFORD,OXFORD,ME,4276,,,44.55080,...,450201.80000,578810.30000,418143.86000,422078.45000,433966.31400,423637.04500,402934.33900,406028.30000,518495.59200,489577.92900
4,04694GRGPCMILLA,WOODLAND PULP LLC,144 MAIN ST,BAILEYVILLE,WASHINGTON,ME,4694,,,45.15689,...,701134.71520,796244.08590,780170.44930,1138512.10840,1177567.17000,1116741.02000,1195719.24000,1178097.64500,1270418.46500,1086406.59300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
456,99707FRTKN1FORA,FORT KNOX MINE,1 FORT KNOX RD,FAIRBANKS,FAIRBANKS NORTH STAR BOROUGH,AK,99712,,,64.99953,...,3995500.50651,2443891.65163,2611142.20000,2278446.20000,2724241.00000,2468292.90000,19600834.70000,23292802.60000,20823727.50000,19530448.10000
457,99737PGMNX38MIL,POGO MINE,38 MILES NE OF DELTA JUNCTION,DELTA JUNCTION,FAIRBANKS NORTH STAR BOROUGH,AK,99737,,,64.44972,...,9546678.00000,7770795.60000,8402917.40000,9442627.70000,7719192.80000,12062935.10000,8638067.40000,9281139.20000,9205581.50000,7922913.80000
458,99752RDDGP90MIL,RED DOG OPERATIONS,90 MILES N OF KOTZEBUE,KOTZEBUE,NORTHWEST ARCTIC BOROUGH,AK,99752,,,68.06151,...,637521196.30000,777395652.60300,989392511.30300,814003310.83000,906385793.10200,1124384868.58200,561222144.59200,776973928.58100,1082414482.97500,885682874.24200
459,99801KNNCT13401,HECLA GREENS CREEK MINING CO,13401 GLACIER HWY,JUNEAU,JUNEAU BOROUGH,AK,99801,,,58.08180,...,47242868.62000,47185465.74000,47043841.64000,48754931.74000,50615872.74000,41557030.50000,29951062.50000,41780361.00000,58696349.90000,55589597.00000


In [118]:
# cast lat/long columns to float
merged['LATITUDE'] = merged['LATITUDE'].astype(float)
merged['LONGITUDE'] = merged['LONGITUDE'].astype(float)

# create geodataframe using Latitude and Longitude columns
gdf = gpd.GeoDataFrame(merged, geometry=gpd.points_from_xy(merged.LONGITUDE, merged.LATITUDE))

# define crs for geodataframe
gdf.crs = {'init' :'epsg:4326'}

gdf.head()

Unnamed: 0,TRIFD,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,...,TOTAL2010,TOTAL2011,TOTAL2012,TOTAL2013,TOTAL2014,TOTAL2015,TOTAL2016,TOTAL2017,TOTAL2018,geometry
0,0073WLLDWSBARAM,ALLIED WASTE OF PONCE,BARAMAYA AVENUE - PR 500 - COTORRA WARD,PONCE,PONCE MUNICIPIO,PR,732,,,18.00226,...,,,,,520700.0,954568.0,216040.0,3020524.0,3990680.0,POINT (-66.64132 18.00226)
1,03570JMSRV650MA,FRASER N.H. LLC - BERLIN PULP MILL,650 MAIN ST,BERLIN,COOS,NH,3570,,,44.47194,...,,,,,,,,,,POINT (-71.16944 44.47194)
2,04239NTRNTRILEY,PIXELLE SPECIALTY SOLUTIONS,300 RILEY RD - ANDROSCOGGIN MILL,JAY,FRANKLIN,ME,4239,,,44.50633,...,1898321.3,2114151.09,1616165.37,1887060.64,1396386.86,1083316.18,768153.61,3061296.84,3007123.32,POINT (-70.23861 44.50633)
3,04276BSCSCROUTE,ND PAPER INC - RUMFORD DIV,35 HARTFORD ST,RUMFORD,OXFORD,ME,4276,,,44.5508,...,578810.3,418143.86,422078.45,433966.314,423637.045,402934.339,406028.3,518495.592,489577.929,POINT (-70.54110 44.55080)
4,04694GRGPCMILLA,WOODLAND PULP LLC,144 MAIN ST,BAILEYVILLE,WASHINGTON,ME,4694,,,45.15689,...,796244.0859,780170.4493,1138512.1084,1177567.17,1116741.02,1195719.24,1178097.645,1270418.465,1086406.593,POINT (-67.40201 45.15689)


In [119]:
# export geodataframe to geojson
gdf.to_file("../data/top-tri-releases.geojson", driver='GeoJSON', encoding='utf-8')

In [65]:
# load RSEI scores
scores = pd.read_excel('../data/all-rsei.xlsx')
scores

Unnamed: 0,SubmissionYear,FacilityID,RSEI Modeled Pounds,RSEI Modeled Hazard,RSEI Score
0,2007,0071WPNCCN619AV,0.00000,0.00000,0.00000
1,2007,0106WCNNLL25TEX,0.00000,0.00000,0.00000
2,2007,0150WCNSLD17SAI,0.00000,0.00000,0.00000
3,2007,0150WKRLST91CAR,29.79996,536400.20000,62.10780
4,2007,0152WCMRNX1537G,0.00000,0.00000,0.00000
...,...,...,...,...,...
266097,2018,99801KNNCT13401,18646.00000,3599499777.10000,779.09962
266098,2018,99901SCSTG1300S,0.80000,18400.00000,0.51736
266099,2018,155524GYSN23IND,750.00000,10965000000.00000,46982.70000
266100,2018,275593MCPT4191H,148.70000,208700.00000,3.75601


In [66]:
scores = scores.rename(columns={'SubmissionYear':'YEAR','FacilityID': 'TRIFD','RSEI Modeled Pounds':'MODEL_LBS','RSEI Modeled Hazard':'MODEL_HAZARD','RSEI Score':'SCORE'})
scores

Unnamed: 0,YEAR,TRIFD,MODEL_LBS,MODEL_HAZARD,SCORE
0,2007,0071WPNCCN619AV,0.00000,0.00000,0.00000
1,2007,0106WCNNLL25TEX,0.00000,0.00000,0.00000
2,2007,0150WCNSLD17SAI,0.00000,0.00000,0.00000
3,2007,0150WKRLST91CAR,29.79996,536400.20000,62.10780
4,2007,0152WCMRNX1537G,0.00000,0.00000,0.00000
...,...,...,...,...,...
266097,2018,99801KNNCT13401,18646.00000,3599499777.10000,779.09962
266098,2018,99901SCSTG1300S,0.80000,18400.00000,0.51736
266099,2018,155524GYSN23IND,750.00000,10965000000.00000,46982.70000
266100,2018,275593MCPT4191H,148.70000,208700.00000,3.75601


In [88]:
# create pivot table summing totals to create records per facility per year, with total releases
scoresPivot = pd.pivot_table(scores,index=['YEAR', 'TRIFD'], values=['MODEL_LBS','MODEL_HAZARD','SCORE'],aggfunc=np.sum)
scoresPivot

Unnamed: 0_level_0,Unnamed: 1_level_0,MODEL_HAZARD,MODEL_LBS,SCORE
YEAR,TRIFD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2007,00602SMRTMRD115,115000.00000,5.00000,4.07923
2007,00603HWLTTSTATE,0.00000,78.00000,0.00000
2007,00603SYMMTLAMNT,0.00000,0.00000,0.00000
2007,00608DCRBNRD3KM,2625.00000,750.00000,0.17256
2007,00610BXTRHROAD4,11000000000.00000,1000.00000,2018063.00000
...,...,...,...,...
2018,9974WSBLLC1RIVE,94356.60000,11.58000,0.00506
2018,99752RDDGP90MIL,3518393675.55500,219298.24383,2172.04828
2018,99801CRLSK331CL,4220502.81000,292294.00000,0.05632
2018,99801KNNCT13401,3599499777.10000,18646.00000,779.09962


In [89]:
# unstack pivot table to create columns for release type for each year
scoresUnstacked = scoresPivot.unstack(level=0)
scoresUnstacked

Unnamed: 0_level_0,MODEL_HAZARD,MODEL_HAZARD,MODEL_HAZARD,MODEL_HAZARD,MODEL_HAZARD,MODEL_HAZARD,MODEL_HAZARD,MODEL_HAZARD,MODEL_HAZARD,MODEL_HAZARD,...,SCORE,SCORE,SCORE,SCORE,SCORE,SCORE,SCORE,SCORE,SCORE,SCORE
YEAR,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
TRIFD,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
00602SMRTMRD115,115000.00000,46000.00000,161000.00000,161000.00000,,,,,,,...,24.12741,23.61643,,,,,,,,
00603HWLTTSTATE,0.00000,0.00000,3511990.00000,927190.00000,1194610.00000,879255.00000,1244880.00000,192880.00000,2300.00000,4600.00000,...,113.23530,36.96524,33.77863,26.35994,33.97479,8.37460,0.06183,0.12144,,
00603SYMMTLAMNT,0.00000,0.00000,,0.00000,,,,,,,...,,0.00000,,,,,,,,
00608DCRBNRD3KM,2625.00000,55411.10000,43987.50000,44078.50000,46360.50000,49958.50000,56703.00000,53546.00000,54428.00000,51726.00000,...,1.79884,1.81474,1.92370,2.09458,2.39524,2.27793,2.33606,2.24153,2.10507,2.03719
0060WHPNTRCARR1,,,,,,,,,98280.00000,63540.00000,...,,,,,,,5.58495,3.05970,1.81010,1.76193
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99801KNNCT13401,7223744936.00000,7223598369.00000,7221730839.00000,7521905809.00000,7522176340.00000,7523344007.00000,7674883606.00000,3660391250.50000,3665556534.60000,2437083706.10000,...,1576.32292,1636.10736,1632.72914,1630.07289,1660.46230,788.74866,790.08475,526.19206,698.74436,779.09962
9982WGLCRBPBX14,,,,,,0.00000,0.00000,0.00000,0.00000,,...,,,,0.00000,0.00000,0.00000,0.00000,,,
99901LSKSH381TN,,,,,327670.00000,53690.00000,,281516.00000,,,...,,,10.36650,1.69391,,0.39908,,,,
99901SCSTG1300S,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,11343.60000,6743.60000,2419.60000,3247.60000,...,0.00000,0.00000,0.00000,0.00000,0.32276,0.19133,0.06846,0.09167,0.07391,0.51736


In [90]:
# rename columns to reflect year and release type
scoresUnstacked.columns = [''.join(str(s).strip() for s in col if s) for col in scoresUnstacked.columns]
scoresUnstacked

Unnamed: 0_level_0,MODEL_HAZARD2007,MODEL_HAZARD2008,MODEL_HAZARD2009,MODEL_HAZARD2010,MODEL_HAZARD2011,MODEL_HAZARD2012,MODEL_HAZARD2013,MODEL_HAZARD2014,MODEL_HAZARD2015,MODEL_HAZARD2016,...,SCORE2009,SCORE2010,SCORE2011,SCORE2012,SCORE2013,SCORE2014,SCORE2015,SCORE2016,SCORE2017,SCORE2018
TRIFD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00602SMRTMRD115,115000.00000,46000.00000,161000.00000,161000.00000,,,,,,,...,24.12741,23.61643,,,,,,,,
00603HWLTTSTATE,0.00000,0.00000,3511990.00000,927190.00000,1194610.00000,879255.00000,1244880.00000,192880.00000,2300.00000,4600.00000,...,113.23530,36.96524,33.77863,26.35994,33.97479,8.37460,0.06183,0.12144,,
00603SYMMTLAMNT,0.00000,0.00000,,0.00000,,,,,,,...,,0.00000,,,,,,,,
00608DCRBNRD3KM,2625.00000,55411.10000,43987.50000,44078.50000,46360.50000,49958.50000,56703.00000,53546.00000,54428.00000,51726.00000,...,1.79884,1.81474,1.92370,2.09458,2.39524,2.27793,2.33606,2.24153,2.10507,2.03719
0060WHPNTRCARR1,,,,,,,,,98280.00000,63540.00000,...,,,,,,,5.58495,3.05970,1.81010,1.76193
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99801KNNCT13401,7223744936.00000,7223598369.00000,7221730839.00000,7521905809.00000,7522176340.00000,7523344007.00000,7674883606.00000,3660391250.50000,3665556534.60000,2437083706.10000,...,1576.32292,1636.10736,1632.72914,1630.07289,1660.46230,788.74866,790.08475,526.19206,698.74436,779.09962
9982WGLCRBPBX14,,,,,,0.00000,0.00000,0.00000,0.00000,,...,,,,0.00000,0.00000,0.00000,0.00000,,,
99901LSKSH381TN,,,,,327670.00000,53690.00000,,281516.00000,,,...,,,10.36650,1.69391,,0.39908,,,,
99901SCSTG1300S,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,11343.60000,6743.60000,2419.60000,3247.60000,...,0.00000,0.00000,0.00000,0.00000,0.32276,0.19133,0.06846,0.09167,0.07391,0.51736


In [91]:
# # merge amounts and scores, by TRIFD
# amountsUnstacked = amountsUnstacked.merge(scoresUnstacked, on='TRIFD',)
# amountsUnstacked

In [132]:
# now find top values for each column
# declare empty dataframe to hold top ordered rows
topRows = pd.DataFrame()

# create list of column names
columns = list(scoresUnstacked.columns)

# loop through each column
# finding the 100 largest values in each
# and appending to empty topRows dataframe
for column in columns:
    topValues = scoresUnstacked.nlargest(25, column)
    topRows = topRows.append(topValues)

    
topRows

Unnamed: 0_level_0,MODEL_HAZARD2007,MODEL_HAZARD2008,MODEL_HAZARD2009,MODEL_HAZARD2010,MODEL_HAZARD2011,MODEL_HAZARD2012,MODEL_HAZARD2013,MODEL_HAZARD2014,MODEL_HAZARD2015,MODEL_HAZARD2016,...,SCORE2009,SCORE2010,SCORE2011,SCORE2012,SCORE2013,SCORE2014,SCORE2015,SCORE2016,SCORE2017,SCORE2018
TRIFD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
65704MNSSTHWYAX,4865485302531.59961,2411174770000.00000,24775800.00000,86119420.20000,16885200.00000,72662243080.00000,21471240.00000,7506060.00000,11765980.00000,3548697943.00000,...,144.14422,350.98860,65.46088,279476.34137,115.12757,69.17252,217.60205,14247.79543,49.29491,63.02393
65746STLPRSTEEL,4645690980000.00000,122114100.00000,58944000.00000,92928000.00000,214680000.00000,1752000.00000,1536000.00000,1856400.00000,7119360.00000,7830313.00000,...,221.88482,352.98599,819.68715,6.89668,6.05382,7.60944,29.43378,32.08000,5.28936,4.25826
27263RYLDV325KE,3040057865020.00000,3383242775240.00000,0.00000,0.00000,,,,,,,...,0.00000,0.00000,,,,,,,,
77507HCHST9502B,2255236358380.63916,1954471840428.12305,2105096365491.16406,2418763223862.15381,1706315519667.36011,92564760369.27101,79490864711.93098,1407874857634.17407,86950515300.13263,386477071288.75891,...,13695517.96758,15389441.02056,12483090.21320,10328717.56011,9500398.18205,20728213.71492,10794495.68243,13930121.30488,10585289.44109,6086036.46651
40216DDLSV4242C,567235632382.14600,82757546903.50389,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90248NCMTL417WE,,,,,,,,,,,...,,,,,,,,,3456180.00000,3454770.00000
23231STRLZ5674E,,,,,,,,90728000000.00000,79134000000.00000,85547000000.00000,...,,,,,,7385530.00000,6513360.00000,7119780.00000,7173770.00000,3273930.00000
77545CHMPN3130H,2814887878.96000,47808820049.74000,8385175926.31240,5566576127.45200,2407937770.46000,1306428309.91353,1587930924.75200,397842095.92633,17543033.56060,3445002861.96000,...,1765116.62601,1432918.54424,662839.72721,365662.35459,399680.07633,113200.49034,4972.86686,1046188.16252,150432.49884,3260765.87813
72032KRKWD670EQ,648000.00000,,2693980.00000,,0.00000,0.00000,0.00000,0.00000,3867800.00000,6339900.00000,...,174.02488,,0.00000,0.00000,0.00000,0.00000,179.82202,301.36254,4.24130,2983905.68540


In [133]:
topRows = topRows.drop_duplicates()
topRows

Unnamed: 0_level_0,MODEL_HAZARD2007,MODEL_HAZARD2008,MODEL_HAZARD2009,MODEL_HAZARD2010,MODEL_HAZARD2011,MODEL_HAZARD2012,MODEL_HAZARD2013,MODEL_HAZARD2014,MODEL_HAZARD2015,MODEL_HAZARD2016,...,SCORE2009,SCORE2010,SCORE2011,SCORE2012,SCORE2013,SCORE2014,SCORE2015,SCORE2016,SCORE2017,SCORE2018
TRIFD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
65704MNSSTHWYAX,4865485302531.59961,2411174770000.00000,24775800.00000,86119420.20000,16885200.00000,72662243080.00000,21471240.00000,7506060.00000,11765980.00000,3548697943.00000,...,144.14422,350.98860,65.46088,279476.34137,115.12757,69.17252,217.60205,14247.79543,49.29491,63.02393
65746STLPRSTEEL,4645690980000.00000,122114100.00000,58944000.00000,92928000.00000,214680000.00000,1752000.00000,1536000.00000,1856400.00000,7119360.00000,7830313.00000,...,221.88482,352.98599,819.68715,6.89668,6.05382,7.60944,29.43378,32.08000,5.28936,4.25826
27263RYLDV325KE,3040057865020.00000,3383242775240.00000,0.00000,0.00000,,,,,,,...,0.00000,0.00000,,,,,,,,
77507HCHST9502B,2255236358380.63916,1954471840428.12305,2105096365491.16406,2418763223862.15381,1706315519667.36011,92564760369.27101,79490864711.93098,1407874857634.17407,86950515300.13263,386477071288.75891,...,13695517.96758,15389441.02056,12483090.21320,10328717.56011,9500398.18205,20728213.71492,10794495.68243,13930121.30488,10585289.44109,6086036.46651
40216DDLSV4242C,567235632382.14600,82757546903.50389,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25112NNCRBRTE25,45100825710.28000,39810333313.40000,66451416751.52000,61908417855.58001,44946550154.61466,36444234497.10001,72183269734.86000,63966134578.16000,67728109614.04000,52199127617.34000,...,2805162.83276,2251220.53756,1183632.18060,761567.45406,3000570.78465,2548972.65795,2877263.46568,2404096.23304,2544750.42711,2282336.73052
77205SMTHN16740,202800000.00000,202800000.00000,202800000.00000,3738249000.00000,6053391000.00000,5750514000.00000,4269228000.00000,4325374000.00000,4335240000.00000,,...,45657.67000,882419.40000,1467106.00000,1430150.70000,1088890.30000,1130844.90000,1161263.00000,,,7468108.00000
44035NGLHR120PI,14402638865.40196,14401732210.40982,14402677984.39582,14403150201.16442,5899662729.06162,6057749069.05000,11722372092.94220,2939631058.77136,4897596379.09032,2166170571.60522,...,1342339.50786,1337221.51577,543747.00890,556210.50839,1010977.04315,240696.68519,417024.92123,177055.43162,288096.72446,5198391.92029
4503WFCNLL346GR,,,,,,,,,,,...,,,,,,,,,,4296230.51981


In [134]:
# # now reduce filtered dataframe to find one record per facility
# # use group by TRIFID field, and only keep the identifying fields about each facility
# all_tri = tri_filter.fillna('').groupby('TRIFD').agg({'FACILITY_NAME':'first',
#                              'STREET_ADDRESS':'first',
#                              'CITY':'first',
#                              'COUNTY':'first',
#                              'ST':'first',
#                              'ZIP':'first',
#                              'BIA':'first',
#                              'TRIBE':'first',
#                              'LATITUDE':'first',
#                              'LONGITUDE':'first',
#                              'INDUSTRY_SECTOR':'first',
#                              'PARENT_CO_NAME': 'first'
#                                                }).reset_index()

# all_tri

In [135]:
# merge topRows dataframe to reduced dataframe with one record per facility, with lat/long values
merged = all_tri.merge(topRows, on='TRIFD')
merged

Unnamed: 0,TRIFD,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,...,SCORE2009,SCORE2010,SCORE2011,SCORE2012,SCORE2013,SCORE2014,SCORE2015,SCORE2016,SCORE2017,SCORE2018
0,00610BXTRHROAD4,EDWARDS LIFESCIENCES TECHNOLOGY SARL,STATE RD 402 KM 1.4 N LAS MARIAS IND US TRIAL ...,ANASCO,ANASCO MUNICIPIO,PR,610,,,18.29390,...,1994720.00000,1983053.00000,1971439.00000,2318757.30000,2579384.00000,2825327.00000,2999252.00000,6185696.00000,938779.00000,901403.70000
1,01151MNSNT730WO,SOLUTIA INC,730 WORCESTER ST,SPRINGFIELD,HAMPDEN,MA,1151,,,42.15498,...,2066.02561,4600.43524,5275.97550,5178.29442,7404.05958,20519.58557,22642.33037,21925.85518,21709.77031,22334.36070
2,06383FDRLPINLAN,FUSION PAPERBOARD - CONNECTICUT LLC,130 INLAND RD,VERSAILLES,NEW LONDON,CT,6383,,,41.62371,...,0.00000,0.00000,0.00000,0.00000,1042687.22673,1049377.71344,,,,
3,07080MTZMT3900S,AMES ADVANCED MATERIALS CORP,3900 S CLINTON AVE,SOUTH PLAINFIELD,MIDDLESEX,NJ,7080,,,40.56461,...,590942.90672,2444261.67552,713384.65160,200326.28348,79885.60989,148097.92734,31458.78769,51581.74653,51872.61882,133989.86931
4,07524CRDNL50561,CARDINAL COLOR INC,50-56 1ST AVE,PATERSON,PASSAIC,NJ,7524,,,40.94033,...,0.00000,0.00000,0.00000,0.00000,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192,9453WSLYND477KA,SOLARCITY- SILEVO CTC,47700 KATO ROAD,FREMONT,ALAMEDA,CA,945387307,,,37.46927,...,,851098.00000,,,,,0.58710,,,
193,97015SMLLS13340,PCC STRUCTURALS INC SMALL STRUCTURALS BUSINESS...,13340 SE 84TH AVE,CLACKAMAS,CLACKAMAS,OR,97015,,,45.42578,...,5858851.15149,98635.29384,100076.77533,95401.18236,162857.18657,117824.88851,120010.30345,107022.43776,90751.06355,79885.79854
194,97206LRGST4600S,PCC STRUCTURALS INC LARGE PARTS CAMPUS,4600 SE HARNEY DR,PORTLAND,MULTNOMAH,OR,97206,,,45.46130,...,9982351.61660,226043.61263,265963.35368,276260.26285,303082.36639,184658.30360,180601.85074,93670.65909,64886.79497,36916.78475
195,97220BNGFP19000,BOEING CO OF PORTLAND,19000 NE SANDY BLVD,PORTLAND,MULTNOMAH,OR,97230,,,45.54385,...,35444.16726,47877.80802,101709.22820,157336.56945,90141.55673,101948.89736,574793.29243,6982511.19271,19846568.83484,20009309.06369


In [136]:
# cast lat/long columns to float
merged['LATITUDE'] = merged['LATITUDE'].astype(float)
merged['LONGITUDE'] = merged['LONGITUDE'].astype(float)

# create geodataframe using Latitude and Longitude columns
gdf = gpd.GeoDataFrame(merged, geometry=gpd.points_from_xy(merged.LONGITUDE, merged.LATITUDE))

# define crs for geodataframe
gdf.crs = {'init' :'epsg:4326'}

gdf.head()

Unnamed: 0,TRIFD,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,...,SCORE2010,SCORE2011,SCORE2012,SCORE2013,SCORE2014,SCORE2015,SCORE2016,SCORE2017,SCORE2018,geometry
0,00610BXTRHROAD4,EDWARDS LIFESCIENCES TECHNOLOGY SARL,STATE RD 402 KM 1.4 N LAS MARIAS IND US TRIAL ...,ANASCO,ANASCO MUNICIPIO,PR,610,,,18.2939,...,1983053.0,1971439.0,2318757.3,2579384.0,2825327.0,2999252.0,6185696.0,938779.0,901403.7,POINT (-67.13660 18.29390)
1,01151MNSNT730WO,SOLUTIA INC,730 WORCESTER ST,SPRINGFIELD,HAMPDEN,MA,1151,,,42.15498,...,4600.43524,5275.9755,5178.29442,7404.05958,20519.58557,22642.33037,21925.85518,21709.77031,22334.3607,POINT (-72.52642 42.15498)
2,06383FDRLPINLAN,FUSION PAPERBOARD - CONNECTICUT LLC,130 INLAND RD,VERSAILLES,NEW LONDON,CT,6383,,,41.62371,...,0.0,0.0,0.0,1042687.22673,1049377.71344,,,,,POINT (-72.04313 41.62371)
3,07080MTZMT3900S,AMES ADVANCED MATERIALS CORP,3900 S CLINTON AVE,SOUTH PLAINFIELD,MIDDLESEX,NJ,7080,,,40.56461,...,2444261.67552,713384.6516,200326.28348,79885.60989,148097.92734,31458.78769,51581.74653,51872.61882,133989.86931,POINT (-74.42964 40.56461)
4,07524CRDNL50561,CARDINAL COLOR INC,50-56 1ST AVE,PATERSON,PASSAIC,NJ,7524,,,40.94033,...,0.0,0.0,0.0,,,,,,,POINT (-74.14554 40.94033)


In [137]:
# export geodataframe to geojson
gdf.to_file("../data/top-tri-scores.geojson", driver='GeoJSON', encoding='utf-8')

In [149]:
tri_filter['CHEMICAL'].unique()

array(['PHOSPHORIC ACID',
       'SULFURIC ACID (1994 AND AFTER ACID AEROSOLS" ONLY)"',
       'AMMONIUM SULFATE (SOLUTION)', 'N-NITROSODI-N-PROPYLAMINE',
       'METHYL ETHYL KETONE', 'AMMONIA', 'CHLOROMETHANE',
       '2-METHOXYETHANOL', 'TOLUENE', 'ZINC COMPOUNDS', 'METHANOL',
       'SODIUM HYDROXIDE (SOLUTION)', 'TETRACHLOROETHYLENE',
       'POLYCHLORINATED BIPHENYLS', '1,1,1-TRICHLOROETHANE', 'PROPYLENE',
       'ETHYLENE GLYCOL', 'CARBON DISULFIDE', 'COPPER', 'STYRENE',
       'HYDROGEN CYANIDE', 'ALUMINUM OXIDE (FIBROUS FORMS)', 'MIXTURE',
       'HYDROCHLORIC ACID (1995 AND AFTER ACID AEROSOLS" ONLY)"',
       'SODIUM SULFATE (SOLUTION)', 'ACETONE', 'ZINC (FUME OR DUST)',
       'VINYL CHLORIDE', 'XYLENE (MIXED ISOMERS)', 'COPPER COMPOUNDS',
       'O-XYLENE',
       'CHROMIUM COMPOUNDS(EXCEPT CHROMITE ORE MINED IN THE TRANSVAAL REGION)',
       'DIBUTYL PHTHALATE', 'FREON 113', 'DICHLOROMETHANE',
       'HEXACHLORO-1,3-BUTADIENE', 'METHYLENEBIS(PHENYLISOCYANATE)',
       'N-

In [167]:
chromium = tri_filter[(tri_filter['YEAR'] == 2018) & (tri_filter['CHEMICAL'].str.contains('CHROMIUM'))]
chromium

Unnamed: 0,YEAR,TRIFD,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,LONGITUDE,INDUSTRY_SECTOR,PARENT_CO_NAME,CHEMICAL,WATER,ON_SITE_RELEASE_TOTAL,AIR,UNIT_OF_MEASURE
2659866,2018,2974WCMSTL6371C,CM STEEL INC,6371 CAMPBELL ROAD,YORK,YORK,SC,29745,,,35.03478,-81.12497,Fabricated Metals,,CHROMIUM,0.00000,15.07200,15.07200,Pounds
2659879,2018,27893NCRMC2401S,LINAMAR FORGING CAROLINA,2401 STANTONSBURG RD,WILSON,WILSON,NC,27893,,,35.69570,-77.89097,Fabricated Metals,LINAMAR CORP,CHROMIUM,0.00000,0.00000,0.00000,Pounds
2659913,2018,74651SNRGNHWY15,OGE ENERGY CORP SOONER GENERATING STATION,HWY 15 N,RED ROCK,NOBLE,OK,74651,,,36.45350,-97.05267,Electric Utilities,OGE ENERGY CORP,CHROMIUM COMPOUNDS(EXCEPT CHROMITE ORE MINED I...,0.00000,199.00000,199.00000,Pounds
2659922,2018,55719MNNST611WE,MN TWIST DRILL AQUISITION LLC,1 SW 7TH ST,CHISHOLM,ST LOUIS,MN,55719,,,47.48216,-92.87961,Machinery,,CHROMIUM,0.00000,0.00000,0.00000,Pounds
2659929,2018,54971LLNCL69STA,ALLIANCE LAUNDRY SYSTEMS LLC STANTON PLANT,690 STANTON ST,RIPON,FOND DU LAC,WI,54971,,,43.85293,-88.82580,Machinery,ALLIANCE LAUNDRY SYSTEMS LLC,CHROMIUM,0.00000,5.00000,5.00000,Pounds
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2739398,2018,66106FRBNK3601F,PENTAIR - KANSAS CITY OPERATION,3601 FAIRBANKS AVE,KANSAS CITY,WYANDOTTE,KS,66106,,,39.08783,-94.67105,Electrical Equipment,PENTAIR INC,CHROMIUM,5.00000,212.64000,207.64000,Pounds
2739419,2018,75501LMXML300AL,TCI TEXARKANA INC,300 ALUMAX DR,TEXARKANA,BOWIE,TX,75501,,,33.45071,-94.13436,Primary Metals,TA CHEN INTERNATIONAL INC,CHROMIUM,1.30000,1.40400,0.10400,Pounds
2739425,2018,46580THYNC486W3,SYMMETRY MEDICAL WARSAW BULDING #1,486 W 350 N,WARSAW,KOSCIUSKO,IN,46582,,,41.28500,-85.85270,Miscellaneous Manufacturing,TECOMET INC,CHROMIUM,0.00000,0.00000,0.00000,Pounds
2739447,2018,7737WNVRMS196FM,NOV RMES TOMBALL,10906 FM 2920,TOMBALL,HARRIS,TX,77375,,,30.09958,-95.58480,Machinery,NATIONAL OILWELL VARCO LP,CHROMIUM,0.00000,0.00000,0.00000,Pounds


In [169]:
# now reduce filtered dataframe to find one record per facility and sum chromium and chromium compound releases
# use group by TRIFID field
chromium = chromium.fillna('').groupby('TRIFD').agg({'FACILITY_NAME':'first',
                             'STREET_ADDRESS':'first',
                             'CITY':'first',
                             'COUNTY':'first',
                             'ST':'first',
                             'ZIP':'first',
                             'BIA':'first',
                             'TRIBE':'first',
                             'LATITUDE':'first',
                             'LONGITUDE':'first',
                             'INDUSTRY_SECTOR':'first',
                             'PARENT_CO_NAME': 'first',
                             'ON_SITE_RELEASE_TOTAL':'sum',
                             'AIR':'sum',
                             'YEAR': 'first'
                                               }).reset_index()

chromium

Unnamed: 0,TRIFD,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,LONGITUDE,INDUSTRY_SECTOR,PARENT_CO_NAME,ON_SITE_RELEASE_TOTAL,AIR,YEAR
0,00612THRMKZENOG,THERMO KING PUERTO RICO MANUFACTURA INC,517 ZONA IND US TRIAL,ARECIBO,ARECIBO MUNICIPIO,PR,612,,,18.45815,-66.74646,Machinery,INGERSOLL-RAND CO,0.04900,0.04900,2018
1,00617PRCSNTMASD,ADM ALLIANCE NUTRITION OF PUERTO RICO LLC,TOMAS DAVILA ST EDIF 10 PALMAS ALTAS INDUSTRI...,BARCELONETA,BARCELONETA MUNICIPIO,PR,617,,,18.45276,-66.54038,Food,ARCHER DANIELS MIDLAND CO,0.00000,0.00000,2018
2,00630WDTRT65INF,PUERTO RICO WOOD TREATING,AVE 65 DE INFANTERIA KM 7.0,CAROLINA,SAN JUAN MUNICIPIO,PR,985,,,18.38135,-66.03416,Wood Products,,0.00000,0.00000,2018
3,00659CNTRLRD2KM,ADM ALLIANCE NUTRITION OF PR LLC,STATE RD NO. 2 KM 83.0 CARRIZALES WARD,HATILLO,HATILLO MUNICIPIO,PR,659,,,18.48514,-66.77732,Food,ARCHER DANIELS MIDLAND CO,0.00000,0.00000,2018
4,00694HRVYHRD686,HUBBELL CARIBE LTD,RD 686 KM 17.3,VEGA BAJA,VEGA BAJA MUNICIPIO,PR,693,,,18.48647,-66.40520,Electrical Equipment,HUBBELL INC,0.00000,0.00000,2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3221,99707FRTKN1FORA,FORT KNOX MINE,1 FORT KNOX RD,FAIRBANKS,FAIRBANKS NORTH STAR BOROUGH,AK,99712,,,64.99953,-147.35811,Metal Mining,KINROSS GOLD CORP,1600039.00000,39.00000,2018
3222,99737PGMNX38MIL,POGO MINE,38 MILES NE OF DELTA JUNCTION,DELTA JUNCTION,FAIRBANKS NORTH STAR BOROUGH,AK,99737,,,64.44972,-144.93962,Metal Mining,SUMITOMO METAL MINING AMERICA INC,39161.00000,1.00000,2018
3223,99752RDDGP90MIL,RED DOG OPERATIONS,90 MILES N OF KOTZEBUE,KOTZEBUE,NORTHWEST ARCTIC BOROUGH,AK,99752,,,68.06151,-162.85549,Metal Mining,TECK AMERICAN INC,3458943.92000,34.59000,2018
3224,99801CRLSK331CL,COEUR ALASKA INC KENSINGTON GOLD PROJECT,3031 CLINTON DR,JUNEAU,JUNEAU BOROUGH,AK,99801,,,58.86749,-135.10476,Metal Mining,COEUR MINING INC.,3502.00000,2.00000,2018


In [171]:
# cast lat/long columns to float
chromium['LATITUDE'] = chromium['LATITUDE'].astype(float)
chromium['LONGITUDE'] = chromium['LONGITUDE'].astype(float)

# create geodataframe using Latitude and Longitude columns
gdf = gpd.GeoDataFrame(chromium, geometry=gpd.points_from_xy(chromium.LONGITUDE, chromium.LATITUDE))

# define crs for geodataframe
gdf.crs = {'init' :'epsg:4326'}

gdf.head()

Unnamed: 0,TRIFD,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,LONGITUDE,INDUSTRY_SECTOR,PARENT_CO_NAME,ON_SITE_RELEASE_TOTAL,AIR,YEAR,geometry
0,00612THRMKZENOG,THERMO KING PUERTO RICO MANUFACTURA INC,517 ZONA IND US TRIAL,ARECIBO,ARECIBO MUNICIPIO,PR,612,,,18.45815,-66.74646,Machinery,INGERSOLL-RAND CO,0.049,0.049,2018,POINT (-66.74646 18.45815)
1,00617PRCSNTMASD,ADM ALLIANCE NUTRITION OF PUERTO RICO LLC,TOMAS DAVILA ST EDIF 10 PALMAS ALTAS INDUSTRI...,BARCELONETA,BARCELONETA MUNICIPIO,PR,617,,,18.45276,-66.54038,Food,ARCHER DANIELS MIDLAND CO,0.0,0.0,2018,POINT (-66.54038 18.45276)
2,00630WDTRT65INF,PUERTO RICO WOOD TREATING,AVE 65 DE INFANTERIA KM 7.0,CAROLINA,SAN JUAN MUNICIPIO,PR,985,,,18.38135,-66.03416,Wood Products,,0.0,0.0,2018,POINT (-66.03416 18.38135)
3,00659CNTRLRD2KM,ADM ALLIANCE NUTRITION OF PR LLC,STATE RD NO. 2 KM 83.0 CARRIZALES WARD,HATILLO,HATILLO MUNICIPIO,PR,659,,,18.48514,-66.77732,Food,ARCHER DANIELS MIDLAND CO,0.0,0.0,2018,POINT (-66.77732 18.48514)
4,00694HRVYHRD686,HUBBELL CARIBE LTD,RD 686 KM 17.3,VEGA BAJA,VEGA BAJA MUNICIPIO,PR,693,,,18.48647,-66.4052,Electrical Equipment,HUBBELL INC,0.0,0.0,2018,POINT (-66.40520 18.48647)


In [172]:
# export geodataframe to geojson
gdf.to_file("../data/chromium-facilities.geojson", driver='GeoJSON', encoding='utf-8')

In [170]:
ethylene = tri_filter[(tri_filter['YEAR'] == 2018) & (tri_filter['CHEMICAL'].str.contains('ETHYLENE OXIDE'))]
ethylene

Unnamed: 0,YEAR,TRIFD,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,LONGITUDE,INDUSTRY_SECTOR,PARENT_CO_NAME,CHEMICAL,WATER,ON_SITE_RELEASE_TOTAL,AIR,UNIT_OF_MEASURE
2661062,2018,70669VSTCHOLDSP,SASOL CHEMICALS (USA) LLC-LAKE CHARLES CHEMICA...,2201 OLD SPANISH TRAIL,WESTLAKE,CALCASIEU PARISH,LA,70669,,,30.25880,-93.29370,Chemicals,SASOL (USA) CORP,ETHYLENE OXIDE,0.00000,2237.00000,2237.00000,Pounds
2661163,2018,30209CRBRD8195N,BECTON DICKINSON & CO COVINGTON OPERATIONS,8195 INDUSTRIAL BLVD,COVINGTON,NEWTON,GA,30014,,,33.60820,-83.83960,Miscellaneous Manufacturing,BECTON DICKINSON & CO,ETHYLENE OXIDE,0.00000,656.30000,656.30000,Pounds
2661788,2018,16510MDCLM2205E,COSMED OF PENNSYLVANIA,2205 E 33RD ST,ERIE,ERIE,PA,16510,,,42.12398,-80.02186,Miscellaneous Manufacturing,COSMED GROUP INC,ETHYLENE OXIDE,0.00000,293.00000,293.00000,Pounds
2662031,2018,74063CHMLN9100W,BAKER PETROLITE LLC,9100 W 21ST ST,SAND SPRINGS,TULSA,OK,74063,,,36.12999,-96.09903,Chemicals,BAKER HUGHES A GE CO LLC,ETHYLENE OXIDE,0.00000,9.00000,9.00000,Pounds
2662084,2018,49504STRLS520WA,VIANT MEDICAL INC VIANT STERILIZATION SERVICES,520 WATSON ST SW,GRAND RAPIDS,KENT,MI,49504,,,42.96175,-85.68245,Miscellaneous Manufacturing,VIANT MEDICAL LLC,ETHYLENE OXIDE,0.00000,378.39000,378.39000,Pounds
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2738248,2018,30906KNDLL1816M,KENDALL PATIENT RECOVERY LLC,1430 MARVIN GRIFFIN RD,AUGUSTA,RICHMOND,GA,30906,,,33.39440,-81.98460,Miscellaneous Manufacturing,CARDINAL HEALTH INC,ETHYLENE OXIDE,0.00000,259.00000,259.00000,Pounds
2738259,2018,29605THXCHPERIM,ETHOX CHEMICALS LLC,1801 PERIMETER RD,GREENVILLE,GREENVILLE,SC,29605,,,34.73447,-82.37562,Chemicals,PIEDMONT CHEMICAL INDUSTRIES INC,ETHYLENE OXIDE,0.00000,11.30000,11.30000,Pounds
2738280,2018,62525STLYM2200E,TATE & LYLE DECATUR,2200 E ELDORADO ST,DECATUR,MACON,IL,62521,,,39.84995,-88.92346,Food,TATE & LYLE INGREDIENTS AMERICAS LLC,ETHYLENE OXIDE,0.00000,239.00000,239.00000,Pounds
2738555,2018,43920VNRLL1250S,HERITAGE THERMAL SERVICES,1250 ST GEORGE ST,EAST LIVERPOOL,COLUMBIANA,OH,43920,,,40.63162,-80.54632,Hazardous Waste,HERITAGE-WTI LLC,ETHYLENE OXIDE,0.00000,0.00302,0.00302,Pounds


In [173]:
# cast lat/long columns to float
ethylene['LATITUDE'] = ethylene['LATITUDE'].astype(float)
ethylene['LONGITUDE'] = ethylene['LONGITUDE'].astype(float)

# create geodataframe using Latitude and Longitude columns
gdf = gpd.GeoDataFrame(ethylene, geometry=gpd.points_from_xy(ethylene.LONGITUDE, ethylene.LATITUDE))

# define crs for geodataframe
gdf.crs = {'init' :'epsg:4326'}

gdf.head()

Unnamed: 0,YEAR,TRIFD,FACILITY_NAME,STREET_ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,LONGITUDE,INDUSTRY_SECTOR,PARENT_CO_NAME,CHEMICAL,WATER,ON_SITE_RELEASE_TOTAL,AIR,UNIT_OF_MEASURE,geometry
2661062,2018,70669VSTCHOLDSP,SASOL CHEMICALS (USA) LLC-LAKE CHARLES CHEMICA...,2201 OLD SPANISH TRAIL,WESTLAKE,CALCASIEU PARISH,LA,70669,,,30.2588,-93.2937,Chemicals,SASOL (USA) CORP,ETHYLENE OXIDE,0.0,2237.0,2237.0,Pounds,POINT (-93.29370 30.25880)
2661163,2018,30209CRBRD8195N,BECTON DICKINSON & CO COVINGTON OPERATIONS,8195 INDUSTRIAL BLVD,COVINGTON,NEWTON,GA,30014,,,33.6082,-83.8396,Miscellaneous Manufacturing,BECTON DICKINSON & CO,ETHYLENE OXIDE,0.0,656.3,656.3,Pounds,POINT (-83.83960 33.60820)
2661788,2018,16510MDCLM2205E,COSMED OF PENNSYLVANIA,2205 E 33RD ST,ERIE,ERIE,PA,16510,,,42.12398,-80.02186,Miscellaneous Manufacturing,COSMED GROUP INC,ETHYLENE OXIDE,0.0,293.0,293.0,Pounds,POINT (-80.02186 42.12398)
2662031,2018,74063CHMLN9100W,BAKER PETROLITE LLC,9100 W 21ST ST,SAND SPRINGS,TULSA,OK,74063,,,36.12999,-96.09903,Chemicals,BAKER HUGHES A GE CO LLC,ETHYLENE OXIDE,0.0,9.0,9.0,Pounds,POINT (-96.09903 36.12999)
2662084,2018,49504STRLS520WA,VIANT MEDICAL INC VIANT STERILIZATION SERVICES,520 WATSON ST SW,GRAND RAPIDS,KENT,MI,49504,,,42.96175,-85.68245,Miscellaneous Manufacturing,VIANT MEDICAL LLC,ETHYLENE OXIDE,0.0,378.39,378.39,Pounds,POINT (-85.68245 42.96175)


In [174]:
# export geodataframe to geojson
gdf.to_file("../data/ethylene-facilities.geojson", driver='GeoJSON', encoding='utf-8')