# Explore parent companies of TRI facilities

In [9]:
# import packages
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely.geometry import mapping
import jenkspy

import warnings
warnings.simplefilter(action='ignore')

In [4]:
# read tri geojson in
gdf = gpd.read_file('../data/tri-facilities.geojson')

In [6]:
# find value counts of parent companies to find top companies
comp = gdf['PARENT_CO_NAME'].value_counts().rename_axis('parentCompany').reset_index(name='count')

# set option to view all results in display window
pd.set_option('display.max_rows', None)

comp

Unnamed: 0,parentCompany,count
0,,5168
1,US DEPARTMENT OF DEFENSE,268
2,BERKSHIRE HATHAWAY INC,191
3,CEMEX INC,171
4,ARGOS USA CORP,164
5,KOCH INDUSTRIES INC,139
6,CRH AMERICAS INC,128
7,CLEAN HARBORS INC,115
8,TYSON FOODS INC,112
9,MARATHON PETROLEUM CORP,90


In [7]:
# drop first row which is the sum of all facilities that didn't report a parent company
comp = comp.drop(comp.index[0]).reset_index()
comp = comp.drop(['index'], axis=1)
comp

Unnamed: 0,parentCompany,count
0,US DEPARTMENT OF DEFENSE,268
1,BERKSHIRE HATHAWAY INC,191
2,CEMEX INC,171
3,ARGOS USA CORP,164
4,KOCH INDUSTRIES INC,139
5,CRH AMERICAS INC,128
6,CLEAN HARBORS INC,115
7,TYSON FOODS INC,112
8,MARATHON PETROLEUM CORP,90
9,MARTIN MARIETTA MATERIALS INC,90


In [10]:
# find natural breaks in data to pick out companies with the most facilities
# testing different numbers of classes
breaks3 = jenkspy.jenks_breaks(comp['count'], nb_class=3)
print(breaks3)

breaks4 = jenkspy.jenks_breaks(comp['count'], nb_class=4)
print(breaks4)

breaks5 = jenkspy.jenks_breaks(comp['count'], nb_class=5)
print(breaks5)


[1.0, 21.0, 90.0, 268.0]
[1.0, 7.0, 31.0, 90.0, 268.0]
[1.0, 7.0, 29.0, 73.0, 139.0, 268.0]


The data breaks at 90 facilities when I classify using both 3 and 4 classes. This includes 10 companies.

In [11]:
top10 = comp.head(10)
top10

Unnamed: 0,parentCompany,count
0,US DEPARTMENT OF DEFENSE,268
1,BERKSHIRE HATHAWAY INC,191
2,CEMEX INC,171
3,ARGOS USA CORP,164
4,KOCH INDUSTRIES INC,139
5,CRH AMERICAS INC,128
6,CLEAN HARBORS INC,115
7,TYSON FOODS INC,112
8,MARATHON PETROLEUM CORP,90
9,MARTIN MARIETTA MATERIALS INC,90


After finding the top 10 parent companies, I created a csv file containing:
- company name
- address of headquarter
- number of facilities owned
- industry
- and a brief description of each company

Addresses and company information were obtained through google maps and Bloomberg Company Profiles. I was unable to find the exact address for Cemex Inc, only that the company was headquartered in San Pedro Garza García, Mexico. An address was chosen from the centroid of the country to represent Cemex Inc's headquarters.

In [12]:
# read in company info
companies = pd.read_csv('../data/parent-companies.csv', encoding='latin-1')
companies

Unnamed: 0,parentCompany,count,address,industry,description
0,US Department Of Defense,268,"100 S Washington Blvd, Arlington VA 22202",,The United States Department of Defense is an ...
1,Berkshire Hathaway Inc,191,"3555 Farnam Street Omaha, NE 68131",,Berkshire Hathaway is an American multinationa...
2,Cemex Inc,171,"66220 San Pedro Garza García, Nuevo Leon, Mexico",Construction Materials-Cement & Aggregates,Cemex Inc. manufactures cement and ready-mixed...
3,Argos Usa Corp,164,"3015 Windward Plaza, Alpharetta, GA 30005 Unit...",Construction Materials-Cement & Aggregates,Argos USA LLC produces and distributes cements...
4,Koch Industries Inc,139,"2256 Wichita, KS 67201 United States","Oil, Gas & Coal","Koch Industries, Inc. operates as a diversifie..."
5,Crh Americas Inc,128,"900 Ashwood Pkwy, Dunwoody, GA 30338",Construction Materials-Cement & Aggregates,"CRH America, Inc. provides construction materi..."
6,Clean Harbors Inc,115,"42 Longwater Drive Norwell, MA 02061 United St...",Waste Management,"Clean Harbors Environmental Services, Inc. pro..."
7,Tyson Foods Inc,112,"2200 West Don Tyson Parkway Springdale, AR 727...",Consumer Products-Packaged Food,"Tyson Foods, Inc. produces, distributes, and m..."
8,Marathon Petroleum Corp,90,"539 South Main Street Findlay, OH 45840 United...","Oil, Gas & Coal",Marathon Petroleum Corporation operates as a c...
9,Martin Marietta Materials Inc,90,"2710 Wycliff Road Raleigh, NC 27607 United States",Construction Materials-Cement & Aggregates,"Martin Marietta Materials, Inc. produces aggre..."


In [13]:
# geocode addresses, assigning lat/long values to new columns
from geopy import Nominatim

geolocator = Nominatim()

# iterate over rows
for index, row in companies.iterrows():
    location = geolocator.geocode(row['address']) # geocode address
    companies.loc[index,'latitude'] = location.latitude # create latitude column
    companies.loc[index,'longitude'] = location.longitude # create longitude column

companies

Unnamed: 0,parentCompany,count,address,industry,description,latitude,longitude
0,US Department Of Defense,268,"100 S Washington Blvd, Arlington VA 22202",,The United States Department of Defense is an ...,38.865921,-77.073293
1,Berkshire Hathaway Inc,191,"3555 Farnam Street Omaha, NE 68131",,Berkshire Hathaway is an American multinationa...,41.257407,-95.965389
2,Cemex Inc,171,"66220 San Pedro Garza García, Nuevo Leon, Mexico",Construction Materials-Cement & Aggregates,Cemex Inc. manufactures cement and ready-mixed...,25.657634,-100.366755
3,Argos Usa Corp,164,"3015 Windward Plaza, Alpharetta, GA 30005 Unit...",Construction Materials-Cement & Aggregates,Argos USA LLC produces and distributes cements...,34.093298,-84.239493
4,Koch Industries Inc,139,"2256 Wichita, KS 67201 United States","Oil, Gas & Coal","Koch Industries, Inc. operates as a diversifie...",37.692236,-97.337545
5,Crh Americas Inc,128,"900 Ashwood Pkwy, Dunwoody, GA 30338",Construction Materials-Cement & Aggregates,"CRH America, Inc. provides construction materi...",33.932267,-84.340375
6,Clean Harbors Inc,115,"42 Longwater Drive Norwell, MA 02061 United St...",Waste Management,"Clean Harbors Environmental Services, Inc. pro...",42.160652,-70.884074
7,Tyson Foods Inc,112,"2200 West Don Tyson Parkway Springdale, AR 727...",Consumer Products-Packaged Food,"Tyson Foods, Inc. produces, distributes, and m...",36.154429,-94.154233
8,Marathon Petroleum Corp,90,"539 South Main Street Findlay, OH 45840 United...","Oil, Gas & Coal",Marathon Petroleum Corporation operates as a c...,41.036255,-83.650158
9,Martin Marietta Materials Inc,90,"2710 Wycliff Road Raleigh, NC 27607 United States",Construction Materials-Cement & Aggregates,"Martin Marietta Materials, Inc. produces aggre...",35.819505,-78.691141


In [14]:
# convert dataframe to geodataframe
gdf = gpd.GeoDataFrame(companies, geometry=gpd.points_from_xy(companies.longitude, companies.latitude))

# define crs for geodataframe
gdf.crs = {'init' :'epsg:4326'}

# write to file
gdf.to_file('../data/parent-companies.geojson', driver='GeoJSON')