# Explore TRI facility releases over time

I want to explore trends in modeled hazards over time. I downloaded each TRI basic data file for the US for years 2007-2018. There are more data files available, but RSEI scores are only available as far back as 2007.

In [1]:
# import packages
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely.geometry import mapping
from functools import reduce
import os
import re

import warnings
warnings.simplefilter(action='ignore')

### Wrangle TRI facility location information

First I want to combine all the TRI data files for each year into one dataframe.

In [4]:
# define folder containing all tri files
folder = "../data/tri-data"

# create empty list to hold dataframes
fileList = list()

# add each file to the dataframe list
for file in os.listdir(folder):
    df = pd.read_csv(os.path.join(folder, file))
    fileList.append(df)
    
# concatenate all list of dataframes
combined = pd.concat(fileList, axis=0, ignore_index=True)
combined

Unnamed: 0,1. YEAR,2. TRIFD,3. FRS ID,4. FACILITY NAME,5. STREET ADDRESS,6. CITY,7. COUNTY,8. ST,9. ZIP,10. BIA,...,107. 8.3 - ENERGY RECOVER OF,108. 8.4 - RECYCLING ON SITE,109. 8.5 - RECYCLING OFF SIT,110. 8.6 - TREATMENT ON SITE,111. 8.7 - TREATMENT OFF SITE,112. PRODUCTION WSTE (8.1-8.7),113. 8.8 - ONE-TIME RELEASE,114. PROD_RATIO_OR_ ACTIVITY,115. 8.9 - PRODUCTION RATIO,Unnamed: 115
0,2007,77049CHMLN16950,110000497775,BAKER PETROLITE CORP,16950 WALLISVILLE RD,HOUSTON,HARRIS,TX,77049,,...,0.0,0.0,0.0,0.0,250.0,501.76,,,,
1,2007,77049CHMLN16950,110000497775,BAKER PETROLITE CORP,16950 WALLISVILLE RD,HOUSTON,HARRIS,TX,77049,,...,0.0,0.0,0.0,0.0,20.7,201.90,,,1.31,
2,2007,98837TKTMS9255R,110000491174,JOYSON SAFETY SYSTEMS ACQUISITION LLC,9138 RANDOLPH RD NE,MOSES LAKE,GRANT,WA,98837,,...,0.0,0.0,0.0,0.0,2156.0,2356.00,,,1.10,
3,2007,71411WLLMTHIGHW,110013288557,INTERNATIONAL PAPER / RED RIVER MILL,4537 HWY 480,CAMPTI,NATCHITOCHES PARISH,LA,71411,,...,0.0,0.0,0.0,0.0,0.0,134396.00,,,0.93,
4,2007,27403DSTNC1025H,110000346518,SHERWIN-WILLIAMS CO,1025 HOWARD ST,GREENSBORO,GUILFORD,NC,27403,,...,48.0,3290.0,0.0,1717.0,156.0,5703.00,,,0.69,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995220,2018,42366PRMML5680O,110000380971,HINES PRECISION INC,5680 OLD KY 54,PHILPOT,DAVIESS,KY,42366,,...,0.0,0.0,0.0,0.0,0.0,0.00,,,,
995221,2018,37066STVGL1499S,110000493136,US TVA GALLATIN FOSSIL PLANT,1499 STEAM PLANT RD,GALLATIN,SUMNER,TN,37066,,...,0.0,0.0,0.0,0.0,0.0,9.40,,PRODUCTION,0.98,
995222,2018,6416WPNNYS861NE,110020834276,PENNYS CONCRETE,8601 NE 38TH STREET,KANSAS CITY,CLAY,MO,64161,,...,0.0,0.0,0.0,0.0,0.0,0.00,,ACTIVITY,1.00,
995223,2018,46368BTSTL6500S,110000398212,NLMK INDIANA,6500 S BOUNDARY RD,PORTAGE,PORTER,IN,46368,,...,0.0,0.0,53429.0,0.0,0.0,53752.12,,PRODUCTION,1.00,


In [5]:
# remove leading numbers, periods, and space and from column names
combined.columns = combined.columns.str.lstrip('0123456789.- ')
combined

Unnamed: 0,YEAR,TRIFD,FRS ID,FACILITY NAME,STREET ADDRESS,CITY,COUNTY,ST,ZIP,BIA,...,ENERGY RECOVER OF,RECYCLING ON SITE,RECYCLING OFF SIT,TREATMENT ON SITE,TREATMENT OFF SITE,PRODUCTION WSTE (8.1-8.7),ONE-TIME RELEASE,PROD_RATIO_OR_ ACTIVITY,PRODUCTION RATIO,Unnamed: 115
0,2007,77049CHMLN16950,110000497775,BAKER PETROLITE CORP,16950 WALLISVILLE RD,HOUSTON,HARRIS,TX,77049,,...,0.0,0.0,0.0,0.0,250.0,501.76,,,,
1,2007,77049CHMLN16950,110000497775,BAKER PETROLITE CORP,16950 WALLISVILLE RD,HOUSTON,HARRIS,TX,77049,,...,0.0,0.0,0.0,0.0,20.7,201.90,,,1.31,
2,2007,98837TKTMS9255R,110000491174,JOYSON SAFETY SYSTEMS ACQUISITION LLC,9138 RANDOLPH RD NE,MOSES LAKE,GRANT,WA,98837,,...,0.0,0.0,0.0,0.0,2156.0,2356.00,,,1.10,
3,2007,71411WLLMTHIGHW,110013288557,INTERNATIONAL PAPER / RED RIVER MILL,4537 HWY 480,CAMPTI,NATCHITOCHES PARISH,LA,71411,,...,0.0,0.0,0.0,0.0,0.0,134396.00,,,0.93,
4,2007,27403DSTNC1025H,110000346518,SHERWIN-WILLIAMS CO,1025 HOWARD ST,GREENSBORO,GUILFORD,NC,27403,,...,48.0,3290.0,0.0,1717.0,156.0,5703.00,,,0.69,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995220,2018,42366PRMML5680O,110000380971,HINES PRECISION INC,5680 OLD KY 54,PHILPOT,DAVIESS,KY,42366,,...,0.0,0.0,0.0,0.0,0.0,0.00,,,,
995221,2018,37066STVGL1499S,110000493136,US TVA GALLATIN FOSSIL PLANT,1499 STEAM PLANT RD,GALLATIN,SUMNER,TN,37066,,...,0.0,0.0,0.0,0.0,0.0,9.40,,PRODUCTION,0.98,
995222,2018,6416WPNNYS861NE,110020834276,PENNYS CONCRETE,8601 NE 38TH STREET,KANSAS CITY,CLAY,MO,64161,,...,0.0,0.0,0.0,0.0,0.0,0.00,,ACTIVITY,1.00,
995223,2018,46368BTSTL6500S,110000398212,NLMK INDIANA,6500 S BOUNDARY RD,PORTAGE,PORTER,IN,46368,,...,0.0,0.0,53429.0,0.0,0.0,53752.12,,PRODUCTION,1.00,


In [6]:
# reduce dataframe to one record per facility
# use group by TRIFID field, and only keep the identifying fields about each facility
all_tri = combined.fillna('').groupby('TRIFD').agg({'FACILITY NAME':'first',
                             'STREET ADDRESS':'first',
                             'CITY':'first',
                             'COUNTY':'first',
                             'ST':'first',
                             'ZIP':'first',
                             'BIA':'first',
                             'TRIBE':'first',
                             'LATITUDE':'first',
                             'LONGITUDE':'first',
                             'INDUSTRY SECTOR':'first',
                             'CHEMICAL': ', '.join, # to get list of all chemicals released by facility
                             'PARENT CO NAME': 'first'
                                               }).reset_index()

all_tri

Unnamed: 0,TRIFD,FACILITY NAME,STREET ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,LONGITUDE,INDUSTRY SECTOR,CHEMICAL,PARENT CO NAME
0,00602SMRTMRD115,SMART MODULAR TECHNOLOGIES OF PUERTO RICO,RD 115 KM 226 AGUADA INDUSTRIAL PARK,AGUADA,AGUADA MUNICIPIO,PR,602,,,18.380797,-67.191301,Computers and Electronic Products,"LEAD, MERCURY, MERCURY, LEAD, MERCURY, LEAD, L...",SMART MODULAR TECHNOLOGIES
1,00603HWLTTSTATE,HEWLETT-PACKARD CARIBE BV SITE,HWY 110 N KM 5.1,AGUADILLA,AGUADILLA MUNICIPIO,PR,603,,,18.434600,-67.155190,Computers and Electronic Products,"LEAD, N-METHYL-2-PYRROLIDONE, N-METHYL-2-PYRRO...",HEWLETT-PACKARD CO
2,00603SYMMTLAMNT,SYMMETRICON PUERTO RICO LIMITED,LA MONTANA INDUSTRIAL PARK ST B LOT 52,AGUADILLA,AGUADILLA MUNICIPIO,PR,603,,,18.456460,-67.136600,Computers and Electronic Products,"LEAD, LEAD, LEAD",SYMMETRICON INC
3,00608DCRBNRD3KM,IDI CARIBE INC,PR3 KM 151.8 AGUIRRE,SALINAS,SALINAS MUNICIPIO,PR,751,,,17.972778,-66.231944,Chemicals,"STYRENE, STYRENE, ZINC COMPOUNDS, ZINC COMPOUN...",
4,0060WHPNTRCARR1,HP INTERNATIONAL TRADING BV (PUERTO RICO BRANC...,"CARR 110, KM. 5.1",AGUADILLA,AGUADILLA MUNICIPIO,PR,603,,,18.456470,-67.136550,Chemicals,"CERTAIN GLYCOL ETHERS, CERTAIN GLYCOL ETHERS, ...",HP INC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32931,9982WGLCRBPBX14,GLACIER BAY NATIONAL PARK & PRESERVE,1 PARK RD,GUSTAVUS,HOONAH-ANGOON CENSUS AREA,AK,99826,,,58.454121,-135.886878,Other,"LEAD, LEAD, LEAD, LEAD",US DEPARTMENT OF THE INTERIOR
32932,99901LSKSH381TN,VIGOR ALASKA,3801 TONGASS AVE,KETCHIKAN,KETCHIKAN GATEWAY BOROUGH,AK,99901,,,55.355971,-131.698340,Transportation Equipment,"XYLENE (MIXED ISOMERS), XYLENE (MIXED ISOMERS)...",VIGOR MARINE
32933,99901SCSTG1300S,US COAST GUARD BASE KETCHIKAN,1300 STEDMAN ST,KETCHIKAN,KETCHIKAN GATEWAY BOROUGH,AK,99901,,,55.333730,-131.625330,Transportation Equipment,"LEAD, LEAD, LEAD, LEAD, LEAD, LEAD, LEAD, LEAD...",US DEPARTMENT OF HOMELAND SECURITY
32934,9990WCRWLY9STED,CROWLEY KETCHIKAN BULK FUEL STORAGE FACILITY,900 STEDMAN ST,KETCHIKAN,KETCHIKAN GATEWAY BOROUGH,AK,99901,,,55.337600,-131.633310,Petroleum Bulk Terminals,"LEAD COMPOUNDS, ETHYLBENZENE, XYLENE (MIXED IS...",CROWLEY FUELS LLC


### Wrangle RSEI Scores

Next I will load the RSEI scores and wrangle the dataframe into a better format. I need to have one record per facility with a column for each year's RSEI score.

In [7]:
# load RSEI scores
scores = pd.read_excel('../data/all-rsei.xlsx')
scores

Unnamed: 0,SubmissionYear,FacilityID,RSEI Modeled Pounds,RSEI Modeled Hazard,RSEI Score
0,2007,0071WPNCCN619AV,0.00000,0.000000e+00,0.000000
1,2007,0106WCNNLL25TEX,0.00000,0.000000e+00,0.000000
2,2007,0150WCNSLD17SAI,0.00000,0.000000e+00,0.000000
3,2007,0150WKRLST91CAR,29.79996,5.364002e+05,62.107800
4,2007,0152WCMRNX1537G,0.00000,0.000000e+00,0.000000
...,...,...,...,...,...
266097,2018,99801KNNCT13401,18646.00000,3.599500e+09,779.099616
266098,2018,99901SCSTG1300S,0.80000,1.840000e+04,0.517362
266099,2018,155524GYSN23IND,750.00000,1.096500e+10,46982.700000
266100,2018,275593MCPT4191H,148.70000,2.087000e+05,3.756013


In [8]:
# cast year field as string
scores['SubmissionYear'] = scores['SubmissionYear'].astype(str)

# create list of all years
years = scores['SubmissionYear'].unique()

# create a new empty column in dataframe for each year
for year in years:
    scores[year] = ""

In [13]:
# convert year columns into numeric datatypes
scores[years] = scores[years].apply(pd.to_numeric)

# for each year column
for column in scores[years]:
    for index, row in scores.iterrows(): # loop through each row
        if row['SubmissionYear'] == column: # and if the submission year column matches the year column
            scores.loc[index, column] = row['RSEI Modeled Hazard'] # then populate that year column with that years RSEI score

# fill NaN values with 0
# so now a 0 represents no score
# even if a facility did report that year with a 0 score, for the sake of the map we will assume no score for that year
scores.fillna(0, inplace=True)
scores

Unnamed: 0,SubmissionYear,FacilityID,RSEI Modeled Pounds,RSEI Modeled Hazard,RSEI Score,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,2007,0071WPNCCN619AV,0.00000,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00
1,2007,0106WCNNLL25TEX,0.00000,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00
2,2007,0150WCNSLD17SAI,0.00000,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00
3,2007,0150WKRLST91CAR,29.79996,5.364002e+05,62.107800,536400.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00
4,2007,0152WCMRNX1537G,0.00000,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266097,2018,99801KNNCT13401,18646.00000,3.599500e+09,779.099616,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.599500e+09
266098,2018,99901SCSTG1300S,0.80000,1.840000e+04,0.517362,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.840000e+04
266099,2018,155524GYSN23IND,750.00000,1.096500e+10,46982.700000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.096500e+10
266100,2018,275593MCPT4191H,148.70000,2.087000e+05,3.756013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.087000e+05


In [14]:
# reduce dataframe to get one record per facility, with an RSEI score for each year
scores_reduce = scores.fillna('').groupby('FacilityID').agg({'2007':'sum',
                             '2008':'sum',
                             '2009':'sum',
                             '2010':'sum',
                             '2011':'sum',
                             '2012':'sum',
                             '2013':'sum',
                             '2014':'sum',
                             '2015':'sum',
                             '2016':'sum',
                             '2017':'sum',
                             '2018':'sum'}).reset_index()

scores_reduce

Unnamed: 0,FacilityID,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,00602SMRTMRD115,1.150000e+05,4.600000e+04,1.610000e+05,1.610000e+05,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
1,00603HWLTTSTATE,0.000000e+00,0.000000e+00,3.511990e+06,9.271900e+05,1.194610e+06,8.792550e+05,1.244880e+06,1.928800e+05,2.300000e+03,4.600000e+03,0.000000e+00,0.000000e+00
2,00603SYMMTLAMNT,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
3,00608DCRBNRD3KM,2.625000e+03,5.541110e+04,4.398750e+04,4.407850e+04,4.636050e+04,4.995850e+04,5.670300e+04,5.354600e+04,5.442800e+04,5.172600e+04,4.807900e+04,4.603850e+04
4,0060WHPNTRCARR1,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,9.828000e+04,6.354000e+04,3.366000e+04,3.265200e+04
...,...,...,...,...,...,...,...,...,...,...,...,...,...
32903,99801KNNCT13401,7.223745e+09,7.223598e+09,7.221731e+09,7.521906e+09,7.522176e+09,7.523344e+09,7.674884e+09,3.660391e+09,3.665557e+09,2.437084e+09,3.239277e+09,3.599500e+09
32904,9982WGLCRBPBX14,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
32905,99901LSKSH381TN,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,3.276700e+05,5.369000e+04,0.000000e+00,2.815160e+05,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
32906,99901SCSTG1300S,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1.134360e+04,6.743600e+03,2.419600e+03,3.247600e+03,2.624300e+03,1.840000e+04


### Merge TRI facility location dataframe with RSEI score dataframe

In [15]:
# join rsei score dataframe to wv_tri dataframe to get location attributes
all_tri_joined = all_tri.merge(scores_reduce, left_on='TRIFD', right_on='FacilityID')
all_tri_joined

Unnamed: 0,TRIFD,FACILITY NAME,STREET ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,00602SMRTMRD115,SMART MODULAR TECHNOLOGIES OF PUERTO RICO,RD 115 KM 226 AGUADA INDUSTRIAL PARK,AGUADA,AGUADA MUNICIPIO,PR,602,,,18.380797,...,1.610000e+05,1.610000e+05,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
1,00603HWLTTSTATE,HEWLETT-PACKARD CARIBE BV SITE,HWY 110 N KM 5.1,AGUADILLA,AGUADILLA MUNICIPIO,PR,603,,,18.434600,...,3.511990e+06,9.271900e+05,1.194610e+06,8.792550e+05,1.244880e+06,1.928800e+05,2.300000e+03,4.600000e+03,0.000000e+00,0.000000e+00
2,00603SYMMTLAMNT,SYMMETRICON PUERTO RICO LIMITED,LA MONTANA INDUSTRIAL PARK ST B LOT 52,AGUADILLA,AGUADILLA MUNICIPIO,PR,603,,,18.456460,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
3,00608DCRBNRD3KM,IDI CARIBE INC,PR3 KM 151.8 AGUIRRE,SALINAS,SALINAS MUNICIPIO,PR,751,,,17.972778,...,4.398750e+04,4.407850e+04,4.636050e+04,4.995850e+04,5.670300e+04,5.354600e+04,5.442800e+04,5.172600e+04,4.807900e+04,4.603850e+04
4,0060WHPNTRCARR1,HP INTERNATIONAL TRADING BV (PUERTO RICO BRANC...,"CARR 110, KM. 5.1",AGUADILLA,AGUADILLA MUNICIPIO,PR,603,,,18.456470,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,9.828000e+04,6.354000e+04,3.366000e+04,3.265200e+04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32902,99801KNNCT13401,HECLA GREENS CREEK MINING CO,13401 GLACIER HWY,JUNEAU,JUNEAU BOROUGH,AK,99801,,,58.081802,...,7.221731e+09,7.521906e+09,7.522176e+09,7.523344e+09,7.674884e+09,3.660391e+09,3.665557e+09,2.437084e+09,3.239277e+09,3.599500e+09
32903,9982WGLCRBPBX14,GLACIER BAY NATIONAL PARK & PRESERVE,1 PARK RD,GUSTAVUS,HOONAH-ANGOON CENSUS AREA,AK,99826,,,58.454121,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
32904,99901LSKSH381TN,VIGOR ALASKA,3801 TONGASS AVE,KETCHIKAN,KETCHIKAN GATEWAY BOROUGH,AK,99901,,,55.355971,...,0.000000e+00,0.000000e+00,3.276700e+05,5.369000e+04,0.000000e+00,2.815160e+05,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
32905,99901SCSTG1300S,US COAST GUARD BASE KETCHIKAN,1300 STEDMAN ST,KETCHIKAN,KETCHIKAN GATEWAY BOROUGH,AK,99901,,,55.333730,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1.134360e+04,6.743600e+03,2.419600e+03,3.247600e+03,2.624300e+03,1.840000e+04


In [16]:
all_tri_joined.isnull().sum(axis = 0)

TRIFD              0
FACILITY NAME      0
STREET ADDRESS     0
CITY               0
COUNTY             0
ST                 0
ZIP                0
BIA                0
TRIBE              0
LATITUDE           0
LONGITUDE          0
INDUSTRY SECTOR    0
CHEMICAL           0
PARENT CO NAME     0
FacilityID         0
2007               0
2008               0
2009               0
2010               0
2011               0
2012               0
2013               0
2014               0
2015               0
2016               0
2017               0
2018               0
dtype: int64

In [18]:
# drop rows with missing latitude values
missing = all_tri_joined.loc[all_tri_joined['LATITUDE'] == ''].index
all_tri_joined.drop(missing, inplace=True)
all_tri_joined

Unnamed: 0,TRIFD,FACILITY NAME,STREET ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,00602SMRTMRD115,SMART MODULAR TECHNOLOGIES OF PUERTO RICO,RD 115 KM 226 AGUADA INDUSTRIAL PARK,AGUADA,AGUADA MUNICIPIO,PR,602,,,18.380797,...,1.610000e+05,1.610000e+05,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
1,00603HWLTTSTATE,HEWLETT-PACKARD CARIBE BV SITE,HWY 110 N KM 5.1,AGUADILLA,AGUADILLA MUNICIPIO,PR,603,,,18.434600,...,3.511990e+06,9.271900e+05,1.194610e+06,8.792550e+05,1.244880e+06,1.928800e+05,2.300000e+03,4.600000e+03,0.000000e+00,0.000000e+00
2,00603SYMMTLAMNT,SYMMETRICON PUERTO RICO LIMITED,LA MONTANA INDUSTRIAL PARK ST B LOT 52,AGUADILLA,AGUADILLA MUNICIPIO,PR,603,,,18.456460,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
3,00608DCRBNRD3KM,IDI CARIBE INC,PR3 KM 151.8 AGUIRRE,SALINAS,SALINAS MUNICIPIO,PR,751,,,17.972778,...,4.398750e+04,4.407850e+04,4.636050e+04,4.995850e+04,5.670300e+04,5.354600e+04,5.442800e+04,5.172600e+04,4.807900e+04,4.603850e+04
4,0060WHPNTRCARR1,HP INTERNATIONAL TRADING BV (PUERTO RICO BRANC...,"CARR 110, KM. 5.1",AGUADILLA,AGUADILLA MUNICIPIO,PR,603,,,18.456470,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,9.828000e+04,6.354000e+04,3.366000e+04,3.265200e+04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32902,99801KNNCT13401,HECLA GREENS CREEK MINING CO,13401 GLACIER HWY,JUNEAU,JUNEAU BOROUGH,AK,99801,,,58.081802,...,7.221731e+09,7.521906e+09,7.522176e+09,7.523344e+09,7.674884e+09,3.660391e+09,3.665557e+09,2.437084e+09,3.239277e+09,3.599500e+09
32903,9982WGLCRBPBX14,GLACIER BAY NATIONAL PARK & PRESERVE,1 PARK RD,GUSTAVUS,HOONAH-ANGOON CENSUS AREA,AK,99826,,,58.454121,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
32904,99901LSKSH381TN,VIGOR ALASKA,3801 TONGASS AVE,KETCHIKAN,KETCHIKAN GATEWAY BOROUGH,AK,99901,,,55.355971,...,0.000000e+00,0.000000e+00,3.276700e+05,5.369000e+04,0.000000e+00,2.815160e+05,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
32905,99901SCSTG1300S,US COAST GUARD BASE KETCHIKAN,1300 STEDMAN ST,KETCHIKAN,KETCHIKAN GATEWAY BOROUGH,AK,99901,,,55.333730,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1.134360e+04,6.743600e+03,2.419600e+03,3.247600e+03,2.624300e+03,1.840000e+04


### Convert final dataframe to geodataframe and export to geojson

In [19]:
# cast lat/long columns to float
all_tri_joined['LATITUDE'] = all_tri_joined['LATITUDE'].astype(float)
all_tri_joined['LONGITUDE'] = all_tri_joined['LONGITUDE'].astype(float)

# create geodataframe using Latitude and Longitude columns
gdf = gpd.GeoDataFrame(all_tri_joined, geometry=gpd.points_from_xy(all_tri_joined.LONGITUDE, all_tri_joined.LATITUDE))

# define crs for geodataframe
gdf.crs = {'init' :'epsg:4326'}

gdf

Unnamed: 0,TRIFD,FACILITY NAME,STREET ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,geometry
0,00602SMRTMRD115,SMART MODULAR TECHNOLOGIES OF PUERTO RICO,RD 115 KM 226 AGUADA INDUSTRIAL PARK,AGUADA,AGUADA MUNICIPIO,PR,602,,,18.380797,...,1.610000e+05,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,POINT (-67.19130 18.38080)
1,00603HWLTTSTATE,HEWLETT-PACKARD CARIBE BV SITE,HWY 110 N KM 5.1,AGUADILLA,AGUADILLA MUNICIPIO,PR,603,,,18.434600,...,9.271900e+05,1.194610e+06,8.792550e+05,1.244880e+06,1.928800e+05,2.300000e+03,4.600000e+03,0.000000e+00,0.000000e+00,POINT (-67.15519 18.43460)
2,00603SYMMTLAMNT,SYMMETRICON PUERTO RICO LIMITED,LA MONTANA INDUSTRIAL PARK ST B LOT 52,AGUADILLA,AGUADILLA MUNICIPIO,PR,603,,,18.456460,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,POINT (-67.13660 18.45646)
3,00608DCRBNRD3KM,IDI CARIBE INC,PR3 KM 151.8 AGUIRRE,SALINAS,SALINAS MUNICIPIO,PR,751,,,17.972778,...,4.407850e+04,4.636050e+04,4.995850e+04,5.670300e+04,5.354600e+04,5.442800e+04,5.172600e+04,4.807900e+04,4.603850e+04,POINT (-66.23194 17.97278)
4,0060WHPNTRCARR1,HP INTERNATIONAL TRADING BV (PUERTO RICO BRANC...,"CARR 110, KM. 5.1",AGUADILLA,AGUADILLA MUNICIPIO,PR,603,,,18.456470,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,9.828000e+04,6.354000e+04,3.366000e+04,3.265200e+04,POINT (-67.13655 18.45647)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32902,99801KNNCT13401,HECLA GREENS CREEK MINING CO,13401 GLACIER HWY,JUNEAU,JUNEAU BOROUGH,AK,99801,,,58.081802,...,7.521906e+09,7.522176e+09,7.523344e+09,7.674884e+09,3.660391e+09,3.665557e+09,2.437084e+09,3.239277e+09,3.599500e+09,POINT (-134.64121 58.08180)
32903,9982WGLCRBPBX14,GLACIER BAY NATIONAL PARK & PRESERVE,1 PARK RD,GUSTAVUS,HOONAH-ANGOON CENSUS AREA,AK,99826,,,58.454121,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,POINT (-135.88688 58.45412)
32904,99901LSKSH381TN,VIGOR ALASKA,3801 TONGASS AVE,KETCHIKAN,KETCHIKAN GATEWAY BOROUGH,AK,99901,,,55.355971,...,0.000000e+00,3.276700e+05,5.369000e+04,0.000000e+00,2.815160e+05,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,POINT (-131.69834 55.35597)
32905,99901SCSTG1300S,US COAST GUARD BASE KETCHIKAN,1300 STEDMAN ST,KETCHIKAN,KETCHIKAN GATEWAY BOROUGH,AK,99901,,,55.333730,...,0.000000e+00,0.000000e+00,0.000000e+00,1.134360e+04,6.743600e+03,2.419600e+03,3.247600e+03,2.624300e+03,1.840000e+04,POINT (-131.62533 55.33373)


In [20]:
# export geodataframe to geojson
gdf.to_file("../data/all-tri.geojson", driver='GeoJSON', encoding='utf-8')