# Explore West Virginia TRI facilities over time

I want to explore West Virginia's Chemical Valley as a case study. To see trends over time, I downloaded each TRI basic data file for West Virginia for years 2007-2018. There are more data files available, but RSEI scores are only available as far back as 2007.

In [1]:
# import packages
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely.geometry import mapping
from functools import reduce
import os
import re

import warnings
warnings.simplefilter(action='ignore')

### Wrangle TRI facility location information

First I want to combine all the TRI data files for each year into one dataframe.

In [2]:
# define folder containing all WV files
folder = "../data/wv-tri"

# create empty list to hold dataframes
fileList = list()

# add each file to the dataframe list
for file in os.listdir(folder):
    df = pd.read_csv(os.path.join(folder, file))
    fileList.append(df)
    
# concatenate all list of dataframes
combined = pd.concat(fileList, axis=0, ignore_index=True)
combined

Unnamed: 0,1. YEAR,2. TRIFD,3. FRS ID,4. FACILITY NAME,5. STREET ADDRESS,6. CITY,7. COUNTY,8. ST,9. ZIP,10. BIA,...,107. 8.3 - ENERGY RECOVER OF,108. 8.4 - RECYCLING ON SITE,109. 8.5 - RECYCLING OFF SIT,110. 8.6 - TREATMENT ON SITE,111. 8.7 - TREATMENT OFF SITE,112. PRODUCTION WSTE (8.1-8.7),113. 8.8 - ONE-TIME RELEASE,114. PROD_RATIO_OR_ ACTIVITY,115. 8.9 - PRODUCTION RATIO,Unnamed: 115
0,2007,26588RVSVLJACKS,110000604114,ALLEGHENY ENERGY INC RIVESVILLE POWER STATION,JACKSON ST,RIVESVILLE,MARION,WV,26588,,...,0.0,0.0,0.0,54000.0,0.0,71000.000000,,,1.42,
1,2007,26146LMTCH3816S,110056954345,REAL ALLOY RECYCLING LLC,283 INDUSTRIAL PARK ROAD,FRIENDLY,TYLER,WV,26146,,...,0.0,0.0,434.0,0.0,0.0,5124.130000,,,1.16,
2,2007,26836MRCNWROUTE,110000345056,AMERICAN WOODMARK HARDY COUNTY PLANT,390 INDUSTRIAL PARK RD,MOOREFIELD,HARDY,WV,26836,,...,0.0,0.0,0.0,0.0,0.0,0.000000,,,1.00,
3,2007,26554HLMCK8THBE,110000782966,HELMICK CORP,10 TH ST & MINOR AVE,FAIRMONT,MARION,WV,26554,,...,0.0,0.0,0.0,0.0,0.0,208.000000,,,1.00,
4,2007,25112RHNPLROUTE,110069544390,BAYER CROPSCIENCE LP,ROUTE 25 AT I-64,INSTITUTE,KANAWHA,WV,25112,,...,0.0,0.0,0.0,0.0,0.0,103.000000,,,1.25,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10733,2018,25401CPTLCSOUTH,110000585929,ARGOS USA - MARTINSBURG,1826 S QUEEN ST,MARTINSBURG,BERKELEY,WV,25401,,...,0.0,0.0,0.0,0.0,0.0,46.000000,,PRODUCTION,0.91,
10734,2018,25143CSNCXWEST1,110013694038,AC&S INC,W 19TH ST,NITRO,PUTNAM,WV,25143,,...,0.0,0.0,0.0,0.0,0.0,77.280000,,PRODUCTION,1.16,
10735,2018,25801PTRLM218BU,110013807470,PILOT THOMAS LOGISTICS- BECKLEY,218 BUSINESS ST,BECKLEY,RALEIGH,WV,25801,,...,0.0,0.0,0.0,0.0,0.0,0.000000,,,,
10736,2018,25880BXLYC177NS,110055497797,CSC BECKLEY RMC PLANT,1707 N SANDBRANCH RD,MOUNT HOPE,RALEIGH,WV,25880,,...,0.0,0.0,0.0,0.0,0.0,4.370409,,PRODUCTION,1.43,


In [3]:
# remove leading numbers, periods, and space and from column names
combined.columns = combined.columns.str.lstrip('0123456789.- ')
combined

Unnamed: 0,YEAR,TRIFD,FRS ID,FACILITY NAME,STREET ADDRESS,CITY,COUNTY,ST,ZIP,BIA,...,ENERGY RECOVER OF,RECYCLING ON SITE,RECYCLING OFF SIT,TREATMENT ON SITE,TREATMENT OFF SITE,PRODUCTION WSTE (8.1-8.7),ONE-TIME RELEASE,PROD_RATIO_OR_ ACTIVITY,PRODUCTION RATIO,Unnamed: 115
0,2007,26588RVSVLJACKS,110000604114,ALLEGHENY ENERGY INC RIVESVILLE POWER STATION,JACKSON ST,RIVESVILLE,MARION,WV,26588,,...,0.0,0.0,0.0,54000.0,0.0,71000.000000,,,1.42,
1,2007,26146LMTCH3816S,110056954345,REAL ALLOY RECYCLING LLC,283 INDUSTRIAL PARK ROAD,FRIENDLY,TYLER,WV,26146,,...,0.0,0.0,434.0,0.0,0.0,5124.130000,,,1.16,
2,2007,26836MRCNWROUTE,110000345056,AMERICAN WOODMARK HARDY COUNTY PLANT,390 INDUSTRIAL PARK RD,MOOREFIELD,HARDY,WV,26836,,...,0.0,0.0,0.0,0.0,0.0,0.000000,,,1.00,
3,2007,26554HLMCK8THBE,110000782966,HELMICK CORP,10 TH ST & MINOR AVE,FAIRMONT,MARION,WV,26554,,...,0.0,0.0,0.0,0.0,0.0,208.000000,,,1.00,
4,2007,25112RHNPLROUTE,110069544390,BAYER CROPSCIENCE LP,ROUTE 25 AT I-64,INSTITUTE,KANAWHA,WV,25112,,...,0.0,0.0,0.0,0.0,0.0,103.000000,,,1.25,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10733,2018,25401CPTLCSOUTH,110000585929,ARGOS USA - MARTINSBURG,1826 S QUEEN ST,MARTINSBURG,BERKELEY,WV,25401,,...,0.0,0.0,0.0,0.0,0.0,46.000000,,PRODUCTION,0.91,
10734,2018,25143CSNCXWEST1,110013694038,AC&S INC,W 19TH ST,NITRO,PUTNAM,WV,25143,,...,0.0,0.0,0.0,0.0,0.0,77.280000,,PRODUCTION,1.16,
10735,2018,25801PTRLM218BU,110013807470,PILOT THOMAS LOGISTICS- BECKLEY,218 BUSINESS ST,BECKLEY,RALEIGH,WV,25801,,...,0.0,0.0,0.0,0.0,0.0,0.000000,,,,
10736,2018,25880BXLYC177NS,110055497797,CSC BECKLEY RMC PLANT,1707 N SANDBRANCH RD,MOUNT HOPE,RALEIGH,WV,25880,,...,0.0,0.0,0.0,0.0,0.0,4.370409,,PRODUCTION,1.43,


In [5]:
# reduce West Virginia dataframe to one record per facility
# use group by TRIFID field, and only keep the identifying fields about each facility
wv_tri = combined.fillna('').groupby('TRIFD').agg({'FACILITY NAME':'first',
                             'STREET ADDRESS':'first',
                             'CITY':'first',
                             'COUNTY':'first',
                             'ST':'first',
                             'ZIP':'first',
                             'BIA':'first',
                             'TRIBE':'first',
                             'LATITUDE':'first',
                             'LONGITUDE':'first',
                             'INDUSTRY SECTOR':'first',
                             'CHEMICAL': ', '.join, # to get list of all chemicals released by facility
                             'PARENT CO NAME': 'first'
                                               }).reset_index()

wv_tri

Unnamed: 0,TRIFD,FACILITY NAME,STREET ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,LONGITUDE,INDUSTRY SECTOR,CHEMICAL,PARENT CO NAME
0,24701FSRCNRTE29,MINOVA USA INC,394 SHOTT CT,BLUEFIELD,MERCER,WV,24701,,,37.2916,-81.0988,Chemicals,"STYRENE, BENZOYL PEROXIDE, BENZOYL PEROXIDE, S...",ORICA USA INC
1,24701GMRKSRTE29,GEMARK SERVICES OF WEST VIRGINIA INC,RT 290 CUMBERLAND INDUSTRIAL PARK,BLUEFIELD,MERCER,WV,24701,,,37.2867,-81.105,Primary Metals,"DIOXIN AND DIOXIN-LIKE COMPOUNDS, SILVER, SILV...",GEMARK CORP
2,24740CMWDPROGER,ACME WOOD PRESERVING INC,200 ROSELAND AVE,PRINCETON,MERCER,WV,24740,,,37.3619,-81.0842,Wood Products,"ARSENIC COMPOUNDS, CHROMIUM COMPOUNDS(EXCEPT C...",
3,24740CNNWLUSROU,CONN-WELD INC,US RT 460 315 WABASH,PRINCETON,MERCER,WV,24740,,,37.4291,-81.0202,Fabricated Metals,"CHROMIUM, LEAD, NICKEL, NICKEL, LEAD, CHROMIUM...",
4,24740GRGPC577CL,ALLEGHENY WOOD PRODUCTS INC MILL 9,577 CLOVER DEW DAIRY RD,PRINCETON,MERCER,WV,24739,,,37.3197,-81.1292,Wood Products,"LEAD COMPOUNDS, LEAD COMPOUNDS, LEAD COMPOUNDS...",ALLEGHENY WOOD PRODUCTS INC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,26836MRCNW587RB,AMERICAN WOODMARK CORP SOUTH BRANCH PLANT,587 ROBERT C. BYRD INDUSTRIAL PARK RD,MOOREFIELD,HARDY,WV,26836,,,39.0746,-78.9413,Furniture,"METHYL ISOBUTYL KETONE, METHANOL, TOLUENE, LEA...",AMERICAN WOODMARK CORP
266,26836MRCNWROUTE,AMERICAN WOODMARK HARDY COUNTY PLANT,390 INDUSTRIAL PARK RD,MOOREFIELD,HARDY,WV,26836,,,39.0411,-78.9872,Furniture,"LEAD COMPOUNDS, METHANOL, N-BUTYL ALCOHOL, ETH...",AMERICAN WOODMARK CORP
267,26836RCKNGPOTOM,PILGRIM'S PRIDE CORP MOOREFIELD FRESH FACILITY,129 POTOMAC ST,MOOREFIELD,HARDY,WV,26836,,,39.0593,-78.9712,Food,"NITRATE COMPOUNDS, OZONE, NITRATE COMPOUNDS, O...",JBS USA FOOD CO
268,26836WMPLRHWY22,PILGRIM'S PRIDE CORP MOOREFIELD FEEDMILL,194 INDUSTRIAL PARK ROAD,MOOREFIELD,HARDY,WV,26836,,,39.0408,-78.9889,Food,"ZINC COMPOUNDS, COPPER COMPOUNDS, MANGANESE CO...",JBS USA FOODS CO


### Wrangle RSEI Scores

Next I will load the RSEI scores and wrangle the dataframe into a better format. I need to have one record per facility with a column for each year's RSEI score.

In [6]:
# load RSEI scores
scores = pd.read_excel('../data/wv-rsei.xlsx')
scores

Unnamed: 0,SubmissionYear,FacilityID,RSEI Score
0,2007,2630WSTCKM2CLUM,10.831766
1,2007,24701FSRCNRTE29,0.213347
2,2007,24701GMRKSRTE29,0.314023
3,2007,24740CMWDPROGER,0.000000
4,2007,24740CNNWLUSROU,0.000000
...,...,...,...
2169,2018,26814GRRNDRT33X,14.685377
2170,2018,26814LLGHNUSRT2,0.031145
2171,2018,26836MRCNW587RB,55.946225
2172,2018,26836RCKNGPOTOM,0.364649


In [7]:
# cast year field as string
scores['SubmissionYear'] = scores['SubmissionYear'].astype(str)

# create list of all years
years = scores['SubmissionYear'].unique()

# create a new empty column in dataframe for each year
for year in years:
    scores[year] = ""

In [8]:
# convert year columns into numeric datatypes
scores[years] = scores[years].apply(pd.to_numeric)

# for each year column
for column in scores[years]:
    for index, row in scores.iterrows(): # loop through each row
        if row['SubmissionYear'] == column: # and if the submission year column matches the year column
            scores.loc[index, column] = row['RSEI Score'] # then populate that year column with that years RSEI score

# fill NaN values with 0
# so now a 0 represents no score
# even if a facility did report that year with a 0 score, for the sake of the map we will assume no score for that year
scores.fillna(0, inplace=True)
scores

Unnamed: 0,SubmissionYear,FacilityID,RSEI Score,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,2007,2630WSTCKM2CLUM,10.831766,10.831766,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
1,2007,24701FSRCNRTE29,0.213347,0.213347,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
2,2007,24701GMRKSRTE29,0.314023,0.314023,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
3,2007,24740CMWDPROGER,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
4,2007,24740CNNWLUSROU,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2169,2018,26814GRRNDRT33X,14.685377,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.685377
2170,2018,26814LLGHNUSRT2,0.031145,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.031145
2171,2018,26836MRCNW587RB,55.946225,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,55.946225
2172,2018,26836RCKNGPOTOM,0.364649,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.364649


In [9]:
# reduce dataframe to get one record per facility, with an RSEI score for each year
scores_reduce = scores.fillna('').groupby('FacilityID').agg({'2007':'sum',
                             '2008':'sum',
                             '2009':'sum',
                             '2010':'sum',
                             '2011':'sum',
                             '2012':'sum',
                             '2013':'sum',
                             '2014':'sum',
                             '2015':'sum',
                             '2016':'sum',
                             '2017':'sum',
                             '2018':'sum'}).reset_index()

scores_reduce

Unnamed: 0,FacilityID,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,24701FSRCNRTE29,0.213347,0.222526,0.184589,0.184625,0.158778,0.138420,0.114649,0.118509,0.096873,0.076307,0.101707,0.140226
1,24701GMRKSRTE29,0.314023,0.691259,0.639270,0.364298,0.195071,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,24740CMWDPROGER,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,24740CNNWLUSROU,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,24740GRGPC577CL,0.389326,0.378158,0.362043,0.381865,0.371159,0.333478,0.409873,0.481321,0.429945,0.475417,0.501556,0.438661
...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,26836MRCNW587RB,29.277747,33.247441,30.052942,26.952065,39.140308,59.892921,72.862628,68.131344,124.421495,46.484522,56.016006,55.946225
266,26836MRCNWROUTE,164.386818,121.115131,75.240726,92.141946,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
267,26836RCKNGPOTOM,11.415478,16.069737,16.020329,16.200580,14.704296,18.598377,21.881592,0.709710,5.016230,7.532045,0.365116,0.364649
268,26836WMPLRHWY22,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


### Merge TRI facility location dataframe with RSEI score dataframe

In [10]:
# join rsei score dataframe to wv_tri dataframe to get location attributes
wv_tri_joined = wv_tri.merge(scores_reduce, left_on='TRIFD', right_on='FacilityID')
wv_tri_joined

Unnamed: 0,TRIFD,FACILITY NAME,STREET ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,24701FSRCNRTE29,MINOVA USA INC,394 SHOTT CT,BLUEFIELD,MERCER,WV,24701,,,37.2916,...,0.184589,0.184625,0.158778,0.138420,0.114649,0.118509,0.096873,0.076307,0.101707,0.140226
1,24701GMRKSRTE29,GEMARK SERVICES OF WEST VIRGINIA INC,RT 290 CUMBERLAND INDUSTRIAL PARK,BLUEFIELD,MERCER,WV,24701,,,37.2867,...,0.639270,0.364298,0.195071,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,24740CMWDPROGER,ACME WOOD PRESERVING INC,200 ROSELAND AVE,PRINCETON,MERCER,WV,24740,,,37.3619,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,24740CNNWLUSROU,CONN-WELD INC,US RT 460 315 WABASH,PRINCETON,MERCER,WV,24740,,,37.4291,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,24740GRGPC577CL,ALLEGHENY WOOD PRODUCTS INC MILL 9,577 CLOVER DEW DAIRY RD,PRINCETON,MERCER,WV,24739,,,37.3197,...,0.362043,0.381865,0.371159,0.333478,0.409873,0.481321,0.429945,0.475417,0.501556,0.438661
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,26836MRCNW587RB,AMERICAN WOODMARK CORP SOUTH BRANCH PLANT,587 ROBERT C. BYRD INDUSTRIAL PARK RD,MOOREFIELD,HARDY,WV,26836,,,39.0746,...,30.052942,26.952065,39.140308,59.892921,72.862628,68.131344,124.421495,46.484522,56.016006,55.946225
266,26836MRCNWROUTE,AMERICAN WOODMARK HARDY COUNTY PLANT,390 INDUSTRIAL PARK RD,MOOREFIELD,HARDY,WV,26836,,,39.0411,...,75.240726,92.141946,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
267,26836RCKNGPOTOM,PILGRIM'S PRIDE CORP MOOREFIELD FRESH FACILITY,129 POTOMAC ST,MOOREFIELD,HARDY,WV,26836,,,39.0593,...,16.020329,16.200580,14.704296,18.598377,21.881592,0.709710,5.016230,7.532045,0.365116,0.364649
268,26836WMPLRHWY22,PILGRIM'S PRIDE CORP MOOREFIELD FEEDMILL,194 INDUSTRIAL PARK ROAD,MOOREFIELD,HARDY,WV,26836,,,39.0408,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [11]:
wv_tri_joined.isnull().sum(axis = 0)

TRIFD              0
FACILITY NAME      0
STREET ADDRESS     0
CITY               0
COUNTY             0
ST                 0
ZIP                0
BIA                0
TRIBE              0
LATITUDE           0
LONGITUDE          0
INDUSTRY SECTOR    0
CHEMICAL           0
PARENT CO NAME     0
FacilityID         0
2007               0
2008               0
2009               0
2010               0
2011               0
2012               0
2013               0
2014               0
2015               0
2016               0
2017               0
2018               0
dtype: int64

In [17]:
# drop rows with missing latitude values
missing = wv_tri_joined.loc[wv_tri_joined['LATITUDE'] == ''].index
wv_tri_joined.drop(missing, inplace=True)
wv_tri_joined

Unnamed: 0,TRIFD,FACILITY NAME,STREET ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,geometry
0,24701FSRCNRTE29,MINOVA USA INC,394 SHOTT CT,BLUEFIELD,MERCER,WV,24701,,,37.291636,...,0.184625,0.158778,0.138420,0.114649,0.118509,0.096873,0.076307,0.101707,0.140226,POINT (-81.09883 37.29164)
1,24701GMRKSRTE29,GEMARK SERVICES OF WEST VIRGINIA INC,RT 290 CUMBERLAND INDUSTRIAL PARK,BLUEFIELD,MERCER,WV,24701,,,37.286667,...,0.364298,0.195071,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,POINT (-81.10500 37.28667)
2,24740CMWDPROGER,ACME WOOD PRESERVING INC,200 ROSELAND AVE,PRINCETON,MERCER,WV,24740,,,37.361910,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,POINT (-81.08421 37.36191)
3,24740CNNWLUSROU,CONN-WELD INC,US RT 460 315 WABASH,PRINCETON,MERCER,WV,24740,,,37.429080,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,POINT (-81.02019 37.42908)
4,24740GRGPC577CL,ALLEGHENY WOOD PRODUCTS INC MILL 9,577 CLOVER DEW DAIRY RD,PRINCETON,MERCER,WV,24739,,,37.319700,...,0.381865,0.371159,0.333478,0.409873,0.481321,0.429945,0.475417,0.501556,0.438661,POINT (-81.12920 37.31970)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,26836MRCNW587RB,AMERICAN WOODMARK CORP SOUTH BRANCH PLANT,587 ROBERT C. BYRD INDUSTRIAL PARK RD,MOOREFIELD,HARDY,WV,26836,,,39.074558,...,26.952065,39.140308,59.892921,72.862628,68.131344,124.421495,46.484522,56.016006,55.946225,POINT (-78.94134 39.07456)
266,26836MRCNWROUTE,AMERICAN WOODMARK HARDY COUNTY PLANT,390 INDUSTRIAL PARK RD,MOOREFIELD,HARDY,WV,26836,,,39.041110,...,92.141946,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,POINT (-78.98722 39.04111)
267,26836RCKNGPOTOM,PILGRIM'S PRIDE CORP MOOREFIELD FRESH FACILITY,129 POTOMAC ST,MOOREFIELD,HARDY,WV,26836,,,39.059284,...,16.200580,14.704296,18.598377,21.881592,0.709710,5.016230,7.532045,0.365116,0.364649,POINT (-78.97118 39.05928)
268,26836WMPLRHWY22,PILGRIM'S PRIDE CORP MOOREFIELD FEEDMILL,194 INDUSTRIAL PARK ROAD,MOOREFIELD,HARDY,WV,26836,,,39.040816,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,POINT (-78.98890 39.04082)


### Convert final dataframe to geodataframe and export to geojson

In [19]:
# cast lat/long columns to float
wv_tri_joined['LATITUDE'] = wv_tri_joined['LATITUDE'].astype(float)
wv_tri_joined['LONGITUDE'] = wv_tri_joined['LONGITUDE'].astype(float)

# create geodataframe using Latitude and Longitude columns
gdf = gpd.GeoDataFrame(wv_tri_joined, geometry=gpd.points_from_xy(wv_tri_joined.LONGITUDE, wv_tri_joined.LATITUDE))

# define crs for geodataframe
gdf.crs = {'init' :'epsg:4326'}

gdf

Unnamed: 0,TRIFD,FACILITY NAME,STREET ADDRESS,CITY,COUNTY,ST,ZIP,BIA,TRIBE,LATITUDE,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,geometry
0,24701FSRCNRTE29,MINOVA USA INC,394 SHOTT CT,BLUEFIELD,MERCER,WV,24701,,,37.291636,...,0.184625,0.158778,0.138420,0.114649,0.118509,0.096873,0.076307,0.101707,0.140226,POINT (-81.09883 37.29164)
1,24701GMRKSRTE29,GEMARK SERVICES OF WEST VIRGINIA INC,RT 290 CUMBERLAND INDUSTRIAL PARK,BLUEFIELD,MERCER,WV,24701,,,37.286667,...,0.364298,0.195071,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,POINT (-81.10500 37.28667)
2,24740CMWDPROGER,ACME WOOD PRESERVING INC,200 ROSELAND AVE,PRINCETON,MERCER,WV,24740,,,37.361910,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,POINT (-81.08421 37.36191)
3,24740CNNWLUSROU,CONN-WELD INC,US RT 460 315 WABASH,PRINCETON,MERCER,WV,24740,,,37.429080,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,POINT (-81.02019 37.42908)
4,24740GRGPC577CL,ALLEGHENY WOOD PRODUCTS INC MILL 9,577 CLOVER DEW DAIRY RD,PRINCETON,MERCER,WV,24739,,,37.319700,...,0.381865,0.371159,0.333478,0.409873,0.481321,0.429945,0.475417,0.501556,0.438661,POINT (-81.12920 37.31970)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,26836MRCNW587RB,AMERICAN WOODMARK CORP SOUTH BRANCH PLANT,587 ROBERT C. BYRD INDUSTRIAL PARK RD,MOOREFIELD,HARDY,WV,26836,,,39.074558,...,26.952065,39.140308,59.892921,72.862628,68.131344,124.421495,46.484522,56.016006,55.946225,POINT (-78.94134 39.07456)
266,26836MRCNWROUTE,AMERICAN WOODMARK HARDY COUNTY PLANT,390 INDUSTRIAL PARK RD,MOOREFIELD,HARDY,WV,26836,,,39.041110,...,92.141946,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,POINT (-78.98722 39.04111)
267,26836RCKNGPOTOM,PILGRIM'S PRIDE CORP MOOREFIELD FRESH FACILITY,129 POTOMAC ST,MOOREFIELD,HARDY,WV,26836,,,39.059284,...,16.200580,14.704296,18.598377,21.881592,0.709710,5.016230,7.532045,0.365116,0.364649,POINT (-78.97118 39.05928)
268,26836WMPLRHWY22,PILGRIM'S PRIDE CORP MOOREFIELD FEEDMILL,194 INDUSTRIAL PARK ROAD,MOOREFIELD,HARDY,WV,26836,,,39.040816,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,POINT (-78.98890 39.04082)


In [20]:
# export geodataframe to geojson
gdf.to_file("../data/west-virginia-tri.geojson", driver='GeoJSON', encoding='utf-8')