# Department of Transportation - Oil Spill Mapping

In [24]:
# Dependencies and Setup
import pandas as pd
import openpyxl
import json
import numpy as np
import re
from dataprep.clean import clean_lat_long
from dataprep.clean import validate_lat_long

# import requests
import gmaps
# import folium
# import gmaps.datasets
# from shapely.geometry import Point, Polygon
# import geopandas as gpd
# import descartes
# import matplotlib.pyplot as plt
# import plotly.graph_objects as go
# import geopandas
# %matplotlib inline
# %matplotlib notebook

# Import API key
from config import g_map

# Configure gmaps
gmaps.configure(api_key=g_map)

## Bring in 2002-2009 Spill Data Raw

In [4]:
#define function to extract chosen columns
def extract(pipeline_file,column_names):
    #Read data into df
    pipeline_data = pd.read_csv(pipeline_file)
    fixed_pipeline_data = pipeline_data[column_names]
    return pipeline_data, fixed_pipeline_data

In [17]:
# 1. Read the Data_Columns_and_spills into a DataFrame. All sheets
# Unedited excel files
xlfile_2002 = pd.ExcelFile('../Resources/Data_columns_and_spills.xlsx')
pipeline_file = xlfile_2002.parse('2002-2010') #'2002-2010' is the sheet name in the excel file
pipeline_file #is all data all columns

Unnamed: 0,IYEAR,OPERATOR_ID,RPTID,OWNER_OPERATOR_ID,NAME,OPSTREET,OPCITY,OPCOUNTY,OPSTATE,OPZIP,...,IO_DRUG,IO_ALCO,MISC,UNKNOWN,UNKNOWN_TXT,PNAME,PTEL,PEMAIL,PFAX,NARRATIVE
0,2002,19237,20020037,,"TE PRODUCTS PIPELINE CO., LP",2929 ALLEN PARKWAY,HOUSTON,HARRIS,TX,77019,...,,,,,,JLYNN STOUT,7137593614,JPSTOUT@TEPPCO.COM,7.137594e+09,
1,2002,25146,20020038,,"EQUISTAR CHEMICALS, L.P.",PO BOX 1847,ALVIN,BRAZORIA,TX,77512,...,,,,,,GREG LEBLANC,7138446942,GREGORY.LEBLANC@EQUISTARCHEM.COM,7.138447e+09,REGARDING PART C5: SECTIONS OF THIS LINE WERE...
2,2002,12628,20020039,,MOBIL PIPELINE CO,800 BELL STREET,HOUSTON,HARRIS,TX,77002,...,0.0,0.0,,,,STEVEN J ROBENOLT,7136562651,STEVEN.J.ROBENOLT@EXXONMOBIL.COM,7.136568e+09,"ON JANUARY 24,2002 MOBIL PIPELINE COMPANY INCU..."
3,2002,2731,20020040,,CHEVRON PIPELINE CO,2811 HAYES ROAD,HOUSTON,HARRIS,TX,77082,...,,,,,,R A THORPE,2815963571,RATHORPE@CHEVRONTEXACO.COM,2.815964e+09,
4,2002,4472,20020041,,"KINDER MORGAN ENERGY PARTNERS, L.P.","500 DALLAS ST, SUITE 1000",HOUSTON,HARRIS,TX,77002,...,,,,,,JAIME A HERNANDEZ,7133699443,JAIME_HERNANDEZ@KINKDERMORGAN.COM,7.134954e+09,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3025,2009,10012,20090382,,NUSTAR PIPELINE OPERATING PARTNERSHIP L.P.,"7340 W. 21ST STREET N., SUITE 200",WICHITA,SEDGWICK,KS,67205,...,,,,,,DANIEL J. TIBBITS,3167217056,DAN.TIBBITS@NUSTARENERGY.COM,3.167739e+09,
3026,2007,32147,20090383,,MARATHON PIPE LINE LLC,539 SOUTH MAIN STREET,FINDLAY,HANCOCK,OH,45840,...,0.0,0.0,,,,MANDI KUHLMAN,4194214037,MMKUHLMAN@MARATHONPETROLEUM.COM,4.194214e+09,DURING INSTALLATION OF NEW 16” CONTROL VALVE A...
3027,2006,32147,20090384,,MARATHON PIPE LINE LLC,539 SOUTH MAIN STREET,FINDLAY,HANCOCK,OH,45840,...,0.0,0.0,,,,MANDI KUHLMAN,4194214037,MMKUHLMAN@MARATHONPETROLEUM.COM,4.194214e+09,"WHILE PREPARING TO INSTALL A NEW FABRICATION, ..."
3028,2009,32147,20090385,,MARATHON PIPE LINE LLC,539 SOUTH MAIN STREET,FINDLAY,HANCOCK,OH,45840,...,,,RESIDUAL PRODUCT IN THE LINE IGNITED FOLLOWING...,,,MANDI KUHLMAN,4194214037,MMKUHLMAN@MARATHONPETROLEUM.COM,4.194214e+09,"FOLLOWING A NITROGEN PURGE, COLD CUTS WERE UTI..."


In [18]:
# drop null and NaN values from lat and long
pipeline_file = pipeline_file.dropna(subset=["LATITUDE"])
# pipeline_file = pipeline_file.dropna(subset=["LONGITUDE"])
pipeline_file



Unnamed: 0,IYEAR,OPERATOR_ID,RPTID,OWNER_OPERATOR_ID,NAME,OPSTREET,OPCITY,OPCOUNTY,OPSTATE,OPZIP,...,IO_DRUG,IO_ALCO,MISC,UNKNOWN,UNKNOWN_TXT,PNAME,PTEL,PEMAIL,PFAX,NARRATIVE
0,2002,19237,20020037,,"TE PRODUCTS PIPELINE CO., LP",2929 ALLEN PARKWAY,HOUSTON,HARRIS,TX,77019,...,,,,,,JLYNN STOUT,7137593614,JPSTOUT@TEPPCO.COM,7.137594e+09,
1,2002,25146,20020038,,"EQUISTAR CHEMICALS, L.P.",PO BOX 1847,ALVIN,BRAZORIA,TX,77512,...,,,,,,GREG LEBLANC,7138446942,GREGORY.LEBLANC@EQUISTARCHEM.COM,7.138447e+09,REGARDING PART C5: SECTIONS OF THIS LINE WERE...
2,2002,12628,20020039,,MOBIL PIPELINE CO,800 BELL STREET,HOUSTON,HARRIS,TX,77002,...,0.0,0.0,,,,STEVEN J ROBENOLT,7136562651,STEVEN.J.ROBENOLT@EXXONMOBIL.COM,7.136568e+09,"ON JANUARY 24,2002 MOBIL PIPELINE COMPANY INCU..."
6,2002,4805,20020043,,EXPLORER PIPELINE CO,PO BOX 2650,TULSA,TULSA,OK,74101,...,,,,,,ROGER W FLEMING,9184935112,rfleming@expl.com,9.184935e+09,
8,2002,12452,20020045,,MID - AMERICA PIPELINE CO (MAPCO),PO BOX 21628,TULSA,OSAGE,OK,74101,...,,,,,,KENNETH L LYBARGER,9185730350,KENNETH.LYBARGER@WILLIAMS.COM,9.185731e+09,"AN EXCAVATOR, INSTALLING DRAIN TILE, HIT AND P..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3025,2009,10012,20090382,,NUSTAR PIPELINE OPERATING PARTNERSHIP L.P.,"7340 W. 21ST STREET N., SUITE 200",WICHITA,SEDGWICK,KS,67205,...,,,,,,DANIEL J. TIBBITS,3167217056,DAN.TIBBITS@NUSTARENERGY.COM,3.167739e+09,
3026,2007,32147,20090383,,MARATHON PIPE LINE LLC,539 SOUTH MAIN STREET,FINDLAY,HANCOCK,OH,45840,...,0.0,0.0,,,,MANDI KUHLMAN,4194214037,MMKUHLMAN@MARATHONPETROLEUM.COM,4.194214e+09,DURING INSTALLATION OF NEW 16” CONTROL VALVE A...
3027,2006,32147,20090384,,MARATHON PIPE LINE LLC,539 SOUTH MAIN STREET,FINDLAY,HANCOCK,OH,45840,...,0.0,0.0,,,,MANDI KUHLMAN,4194214037,MMKUHLMAN@MARATHONPETROLEUM.COM,4.194214e+09,"WHILE PREPARING TO INSTALL A NEW FABRICATION, ..."
3028,2009,32147,20090385,,MARATHON PIPE LINE LLC,539 SOUTH MAIN STREET,FINDLAY,HANCOCK,OH,45840,...,,,RESIDUAL PRODUCT IN THE LINE IGNITED FOLLOWING...,,,MANDI KUHLMAN,4194214037,MMKUHLMAN@MARATHONPETROLEUM.COM,4.194214e+09,"FOLLOWING A NITROGEN PURGE, COLD CUTS WERE UTI..."


In [28]:
pipeline_new = clean_lat_long(pipeline_file, lat_col="LATITUDE", long_col="LONGITUDE", split=False)
pipeline_new

  0%|                                                                                           | 0/12 [00:00<…

Latitude and Longitude Cleaning Report:
	2648 values cleaned (100.0%)
Result contains 2648 (100.0%) values in the correct format and 0 null values (0.0%)


Unnamed: 0,IYEAR,OPERATOR_ID,RPTID,OWNER_OPERATOR_ID,NAME,OPSTREET,OPCITY,OPCOUNTY,OPSTATE,OPZIP,...,IO_ALCO,MISC,UNKNOWN,UNKNOWN_TXT,PNAME,PTEL,PEMAIL,PFAX,NARRATIVE,latitude_longitude
0,2002,19237,20020037,,"TE PRODUCTS PIPELINE CO., LP",2929 ALLEN PARKWAY,HOUSTON,HARRIS,TX,77019,...,,,,,JLYNN STOUT,7137593614,JPSTOUT@TEPPCO.COM,7.137594e+09,,"(18.0, 57.0)"
1,2002,25146,20020038,,"EQUISTAR CHEMICALS, L.P.",PO BOX 1847,ALVIN,BRAZORIA,TX,77512,...,,,,,GREG LEBLANC,7138446942,GREGORY.LEBLANC@EQUISTARCHEM.COM,7.138447e+09,REGARDING PART C5: SECTIONS OF THIS LINE WERE...,"(31.86, 9.0)"
2,2002,12628,20020039,,MOBIL PIPELINE CO,800 BELL STREET,HOUSTON,HARRIS,TX,77002,...,0.0,,,,STEVEN J ROBENOLT,7136562651,STEVEN.J.ROBENOLT@EXXONMOBIL.COM,7.136568e+09,"ON JANUARY 24,2002 MOBIL PIPELINE COMPANY INCU...","(3.607, 35.244)"
6,2002,4805,20020043,,EXPLORER PIPELINE CO,PO BOX 2650,TULSA,TULSA,OK,74101,...,,,,,ROGER W FLEMING,9184935112,rfleming@expl.com,9.184935e+09,,"(3.0, 49.0)"
8,2002,12452,20020045,,MID - AMERICA PIPELINE CO (MAPCO),PO BOX 21628,TULSA,OSAGE,OK,74101,...,,,,,KENNETH L LYBARGER,9185730350,KENNETH.LYBARGER@WILLIAMS.COM,9.185731e+09,"AN EXCAVATOR, INSTALLING DRAIN TILE, HIT AND P...","(37.2323, -99.7068)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3025,2009,10012,20090382,,NUSTAR PIPELINE OPERATING PARTNERSHIP L.P.,"7340 W. 21ST STREET N., SUITE 200",WICHITA,SEDGWICK,KS,67205,...,,,,,DANIEL J. TIBBITS,3167217056,DAN.TIBBITS@NUSTARENERGY.COM,3.167739e+09,,"(40.5848, -99.3808)"
3026,2007,32147,20090383,,MARATHON PIPE LINE LLC,539 SOUTH MAIN STREET,FINDLAY,HANCOCK,OH,45840,...,0.0,,,,MANDI KUHLMAN,4194214037,MMKUHLMAN@MARATHONPETROLEUM.COM,4.194214e+09,DURING INSTALLATION OF NEW 16” CONTROL VALVE A...,"(39.6327, -87.2841)"
3027,2006,32147,20090384,,MARATHON PIPE LINE LLC,539 SOUTH MAIN STREET,FINDLAY,HANCOCK,OH,45840,...,0.0,,,,MANDI KUHLMAN,4194214037,MMKUHLMAN@MARATHONPETROLEUM.COM,4.194214e+09,"WHILE PREPARING TO INSTALL A NEW FABRICATION, ...","(30.0085, -90.8597)"
3028,2009,32147,20090385,,MARATHON PIPE LINE LLC,539 SOUTH MAIN STREET,FINDLAY,HANCOCK,OH,45840,...,,RESIDUAL PRODUCT IN THE LINE IGNITED FOLLOWING...,,,MANDI KUHLMAN,4194214037,MMKUHLMAN@MARATHONPETROLEUM.COM,4.194214e+09,"FOLLOWING A NITROGEN PURGE, COLD CUTS WERE UTI...","(30.1924, -90.8528)"


In [35]:
validate_lat_long(pipeline_new["latitude_longitude"])

0       True
1       True
2       True
6       True
8       True
        ... 
3025    True
3026    True
3027    True
3028    True
3029    True
Name: latitude_longitude, Length: 2648, dtype: bool

In [21]:
pipeline_new.columns.tolist()

['IYEAR',
 'OPERATOR_ID',
 'RPTID',
 'OWNER_OPERATOR_ID',
 'NAME',
 'OPSTREET',
 'OPCITY',
 'OPCOUNTY',
 'OPSTATE',
 'OPZIP',
 'IHOUR',
 'IDATE',
 'LATITUDE',
 'LONGITUDE',
 'ACCITY',
 'ACCOUNTY',
 'ACSTATE',
 'ACZIP',
 'MPVST',
 'SURNO',
 'TELRN',
 'TELDT',
 'PPPRP',
 'EMRPRP',
 'ENVPRP',
 'OPCPRP',
 'OPCPRPO',
 'PRODPRP',
 'OPPRP',
 'OOPPRP',
 'OOPPRPO',
 'PRPTY',
 'SPILLED',
 'COMM',
 'CLASS',
 'CLASS_TXT',
 'SPUNIT',
 'SPUNIT_TXT',
 'LOSS',
 'RECOV',
 'GEN_CAUSE',
 'GEN_CAUSE_TXT',
 'LINE_SEG',
 'IFED',
 'INTER',
 'OFFSHORE',
 'OFFAREA',
 'BNUMB',
 'OFFST',
 'OCS',
 'OPPROP',
 'PIPEROW',
 'HCA',
 'HCADESC',
 'SYSPRT',
 'SYSPRT_TXT',
 'SYSPRTO',
 'FAIL_OC',
 'FAIL_OC_TXT',
 'FAIL_OCO',
 'PRTYR',
 'INC_PRS',
 'MOP',
 'OPRS',
 'LRTYPE',
 'LRTYPE_TXT',
 'LRTYPEO',
 'LEAK',
 'LEAK_TXT',
 'PUNC_DIAM',
 'RUPTURE',
 'RUPTURE_TXT',
 'RUPLN',
 'PROPLN',
 'UBLKVM',
 'UBLKVA',
 'UBLKVR',
 'UBLKVC',
 'DBLKVM',
 'DBLKVA',
 'DBLKVR',
 'DBLKVC',
 'SEGISO',
 'VLVDIST',
 'SEGCONF',
 'INLINE',
 'INLI

In [8]:
# Pulling useful columns for mapping purposes
pipeline_new = pipeline_new.rename(columns = {
    'RPTID': 'REPORT_NUMBER',
    'OPERATOR_ID': 'OPERATOR_ID',
    'LATITUDE_clean': 'LOCATION_LATITUDE',
    'LONGITUDE_clean': 'LOCATION_LONGITUDE',
    'COMM': 'COMMODITY_RELEASED_TYPE',
    'LOSS': 'UNINTENTIONAL_RELEASE_BBLS',
    'ON_OFF_SHORE',
    'ACCITY',
    'ACCOUNTY',
    'ACSTATE',
    'SYSTEM_PART_INVOLVED',
    'GEN_CAUSE_TXT', 
    'PRPTY'
})

new_columns = [
    'REPORT_NUMBER',
    'OPERATOR_ID',
    'LOCATION_LATITUDE',
    'LOCATION_LONGITUDE',
    'COMMODITY_RELEASED_TYPE',
    'UNINTENTIONAL_RELEASE_BBLS',
    'ON_OFF_SHORE',
    'ONSHORE_CITY_NAME',
    'ONSHORE_COUNTY_NAME',
    'ONSHORE_STATE_ABBREVIATION',
    'SYSTEM_PART_INVOLVED',
    'CAUSE',
    'PRPTY'
    ]

['REPORT_NUMBER',
 'NAME',
 'DATETIME_OF_INCIDENT',
 'LOCATION_LATITUDE',
 'LOCATION_LONGITUDE',
 'COMMODITY_RELEASED_TYPE',
 'SPUNIT_TXT',
 'LOSS',
 'FATALITY_IND',
 'INJURY_IND',
 'SHUTDOWN_DUE_ACCIDENT_IND',
 'IGNITE_IND',
 'EXPLODE_IND',
 'ON_OFF_SHORE',
 'INCIDENT_AREA_TYPE',
 'DEPTH_OF_COVER',
 'ITEM_INVOLVED',
 'PIPE_DIAMETER',
 'INSTALLATION_YEAR',
 'AGE_OF_FACILITY',
 'RELEASE_TYPE',
 'LEAK_TYPE',
 'TOTAL_EST_COST',
 'ACCIDENT_PRESSURE',
 'WATER_CONTAM_IND',
 'CAUSE']

In [9]:
# Load 2002-2009 data into dataframe
# pipeline_file = '../Resources/2002-2009.csv'
pipeline_data, fixed_pipeline_data = extract(pipeline_file,column_names)
yr2002_2009_df = fixed_pipeline_data

TypeError: argument of type 'method' is not iterable

In [None]:
# import pandas as pd
# from shapely.geometry import Point
# import geopandas as gpd
# from geopandas import GeoDataFrame

# geometry = [Point(xy) for xy in zip(latlong_df['LOCATION_LATITUDE'], latlong_df['LOCATION_LONGITUDE'])]
# gdf = GeoDataFrame(latlong_df, geometry=geometry)   

# #this is a simple map that goes with geopandas
# world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
# gdf.plot(ax=world.plot(figsize=(10, 6)), marker='o', color='red', markersize=15);

## Mapping 2010-Present Spills

In [None]:
# Pulling useful columns for mapping purposes
my_columns = [
    'REPORT_NUMBER',
    'OPERATOR_ID',
    'LOCATION_LATITUDE',
    'LOCATION_LONGITUDE',
    'COMMODITY_RELEASED_TYPE',
    'UNINTENTIONAL_RELEASE_BBLS',
    'ON_OFF_SHORE',
    'ONSHORE_CITY_NAME',
    'ONSHORE_COUNTY_NAME',
    'ONSHORE_STATE_ABBREVIATION',
    'SYSTEM_PART_INVOLVED',
    'CAUSE'
    ]

In [None]:
# mapping columns called info2010_df
mapping_df = spill2010_df[my_columns]
# Creating another dataframe of first 100 rows called 'test' t2010_df
# Used to get map functioning and limiting API requests
# t2010_df = info2010_df[:100]
# t2010_df.to_json('test_df.json') # exports DataFrame to json for javascript and mapping functionality
# t2010_df
mapping_df

In [None]:
# # Join lat and long into one column...might be easier for mapping json
# t2010_df.isnull().sum(axis = 0)

In [None]:
# dataframe to dictionary
mapping_dict = mapping_df.to_dict('records')


In [None]:
# making dict into easy to read json for JS array
json_mapping = json.dumps(mapping_dict, indent = 4)
print(json_mapping)

## Mapping 2010-Present 'info2010_df' spill locations ~4000 locations

In [None]:
info_box_template = """
<dl>
<dt>Operator ID: </dt><dd>{OPERATOR_ID}</dd>
<dt>City</dt><dd>{ONSHORE_CITY_NAME}</dd>
<dt>State</dt><dd>{ONSHORE_STATE_ABBREVIATION}</dd>
<dt>Spill Commodity</dt><dd>{COMMODITY_RELEASED_TYPE}</dd>
<dt>Spill Volume (bbls)</dt><dd>{UNINTENTIONAL_RELEASE_BBLS}</dd>
</dl>

"""


spill_info = [info_box_template.format(**row) for index, row in info2010_df.iterrows()]

In [None]:
# Create map from lat and long
locations = info2010_df[["LOCATION_LATITUDE","LOCATION_LONGITUDE"]]
spill = info2010_df["UNINTENTIONAL_RELEASE_BBLS"]
fig = gmaps.figure(center=(36, -97), zoom_level=4)
heat_layer = gmaps.heatmap_layer(locations, weights=spill,dissipating=False,
            max_intensity=30000, point_radius=2)
marker_layer = gmaps.marker_layer(locations, info_box_content=spill_info)
fig.add_layer(heat_layer)
fig.add_layer(marker_layer)
#Display the figure
fig

## Add another map 
### Filtering only pipeline, corrosion failures
### showing property damage instead of spill volume

In [None]:
my_columns = [
    'REPORT_NUMBER',
    'OPERATOR_ID',
    'LOCATION_LATITUDE',
    'LOCATION_LONGITUDE',
    'COMMODITY_RELEASED_TYPE',
    'UNINTENTIONAL_RELEASE_BBLS',
    'ON_OFF_SHORE',
    'ONSHORE_CITY_NAME',
    'ONSHORE_COUNTY_NAME',
    'ONSHORE_STATE_ABBREVIATION',
    'SYSTEM_PART_INVOLVED',
    'INSTALLATION_YEAR',
    'CAUSE',
    'PRPTY'
    ]

In [None]:
pipeline_df = spill2010_df[my_columns]
pipeline_df

In [None]:
# # changing formatting of Property column to currency
# pipeline_df['PRPTY'] = pipeline_df['PRPTY'].apply(lambda x: "${:,.0f}".format(x))
# pipeline_df

In [None]:
# Filtering causes; showing only corrosion failure, material failure of pipe or weld and equipment failure.
pipeline_df.loc[(pipeline_df["CAUSE"] == "CORROSION FAILURE") & 
                       (pipeline_df["CAUSE"] == "MATERIAL FAILURE OF PIPE OR WELD") &
                       (pipeline_df["CAUSE"] == "EQUIPMENT FAILURE")]
pipeline_df

In [None]:
info_box_template = """
<dl>
<dt>Operator ID: </dt><dd>{OPERATOR_ID}</dd>
<dt>City</dt><dd>{ONSHORE_CITY_NAME}</dd>
<dt>State</dt><dd>{ONSHORE_STATE_ABBREVIATION}</dd>
<dt>Cause:</dt><dd>{CAUSE}</dd>
<dt>Spill Commodity</dt><dd>{COMMODITY_RELEASED_TYPE}</dd>
<dt>Spill Volume (bbls)</dt><dd>{UNINTENTIONAL_RELEASE_BBLS}</dd>
<dt>Spill Cost</dt><dd>{PRPTY}</dd>
</dl>

"""

# Using the pipeline_df to gather spill info for marker data...
spill_info = [info_box_template.format(**row) for index, row in pipeline_df.iterrows()]

In [None]:
# Create map from lat and long
locations = pipeline_df[["LOCATION_LATITUDE","LOCATION_LONGITUDE"]]
spill = pipeline_df["PRPTY"]
fig = gmaps.figure(center=(36, -97), zoom_level=4)
heat_layer = gmaps.heatmap_layer(locations, weights=spill,dissipating=False,
            max_intensity=1000000000, point_radius=2)
marker_layer = gmaps.marker_layer(locations, info_box_content=spill_info)
fig.add_layer(heat_layer)
fig.add_layer(marker_layer)
#Display the figure
fig