In [1]:
# import necessary packages
import os
import pandas
import numpy
import matplotlib.pyplot as plt
#import geopandas as gpd
from pathlib import Path
# import function for querying DOE dataframesaq    
from monte_carlo_utils import get_DOE_atb, get_DOE_barges, concat_shp
from dask import delayed

# set file location and name
atb_shp_path = Path('/data/MIDOSS/shapefiles/atb_2018_01.shp')
barge_shp_path = Path('/data/MIDOSS/shapefiles/barge_2018_01.shp')
doe_xls_path = Path('/data/MIDOSS/spreadsheets/MuellerTrans4-30-20.xlsx')
fac_xls_path = Path(
    '/home/rmueller/Projects/MIDOSS/marine_transport_data/Oil_Transfer_Facilities.xlsx'
)
shapefile_path = Path('/data/MIDOSS/shapefiles/') 

# import facility locations
facWA = pandas.read_excel(
    fac_xls_path,
    sheet_name = 'Washington',
    usecols="B,D,J,K"
)

# import facility locations
facCAD = pandas.read_excel(
    fac_xls_path,
    sheet_name = 'British Columbia',
    usecols="A",
    nrows=10
)

In [2]:
# def concat_shp(ship_type, shapefile_path):
#     """
#       INPUT: 
#           - ship_type ["tanker", "barge", "atb", etc]: MIDOSS-name for ship type (see oil_attribution.yaml for list)
#           - shapefile_path [Path]: e.g., on Salish,Path('/data/MIDOSS/shapefiles/') 
#       OUTPUT: 
#           - dataframe of all 2018 ship tracks for given ship_type
#       requirements: 
#           This script requires being run on Salish.eoas.ubc.ca, with path to shapefiles being /data/MIDOSS/shapefiles/
#     """
#     for months in range(1,13):
#         # set file location and name
#         shapefile = shapefile_path/f'{ship_type}_2018_{months:02d}.shp'
#         # import shapefile using geopandas
#         monthly_shp = gpd.read_file(shapefile)
#         if months == 1:
#             print(f'creating {ship_type} shapefile for 2018, starting with January data')
#             allTracks = monthly_shp
#         else:
#             print(f'Concatenating {ship_type} data from month {months}')
#             allTracks = gpd.GeoDataFrame(
#                 pandas.concat([allTracks, monthly_shp])
#             )
#     return allTracks

# Concatenate all monthly ship track data to get values for entire year

### ATBs

In [3]:
%%time
allTracks = {} 
allTracks_dask = delayed(concat_shp("atb", shapefile_path))
allTracks["atb"]=allTracks_dask.compute()

creating atb shapefile for 2018, starting with January data
Concatenating atb data from month 2
Concatenating atb data from month 3
Concatenating atb data from month 4
Concatenating atb data from month 5
Concatenating atb data from month 6
Concatenating atb data from month 7
Concatenating atb data from month 8
Concatenating atb data from month 9
Concatenating atb data from month 10
Concatenating atb data from month 11
Concatenating atb data from month 12
CPU times: user 36.5 s, sys: 881 ms, total: 37.4 s
Wall time: 37.5 s


In [4]:
allTracks["atb"].shape[0]

588136

### Barges

In [5]:
%%time
allTracks_dask = delayed(concat_shp("barge", shapefile_path))
allTracks["barge"]=allTracks_dask.compute()

creating barge shapefile for 2018, starting with January data
Concatenating barge data from month 2
Concatenating barge data from month 3
Concatenating barge data from month 4
Concatenating barge data from month 5
Concatenating barge data from month 6
Concatenating barge data from month 7
Concatenating barge data from month 8
Concatenating barge data from month 9
Concatenating barge data from month 10
Concatenating barge data from month 11
Concatenating barge data from month 12
CPU times: user 15min 4s, sys: 31.4 s, total: 15min 36s
Wall time: 15min 36s


### Tankers

In [6]:
%%time
ship_type = "tanker"
allTracks["tanker"] = concat_shp("tanker", shapefile_path)

creating tanker shapefile for 2018, starting with January data
Concatenating tanker data from month 2
Concatenating tanker data from month 3
Concatenating tanker data from month 4
Concatenating tanker data from month 5
Concatenating tanker data from month 6
Concatenating tanker data from month 7
Concatenating tanker data from month 8
Concatenating tanker data from month 9
Concatenating tanker data from month 10
Concatenating tanker data from month 11
Concatenating tanker data from month 12
CPU times: user 1min 10s, sys: 1.29 s, total: 1min 12s
Wall time: 1min 12s


## Check barge ship track count used in ping-to-transfer ratio estimate
- values recorded in `Origin_Destination_Analysis_updated.xlsx`

In [7]:
print(f'{allTracks["atb"].shape[0]} ATB ship tracks')
print(f'cf. 588,136 ATB ship tracks used in ping-to-transfer estimate')
print(f'{allTracks["barge"].shape[0]} barge ship tracks')

588136 ATB ship tracks
cf. 588,136 ATB ship tracks used in ping-to-transfer estimate
13902896 barge ship tracks


##### Take-away: total number of ship tracks used in the ping-to-transfer ratio matches those used in this analysis.  That's good.  It's what I wanted to verify.  

## Find all ATB and barge tracks with generic attribution as both origin and destination

In [8]:
attribution = ['US','Canada','Pacific']
noNone = {}
allNone = {}
generic = {}

for vessel_type in ["atb",'barge']:
    generic[vessel_type] = allTracks[vessel_type].loc[
        (allTracks[vessel_type].TO.isin(attribution)) & 
        (allTracks[vessel_type].FROM_.isin(attribution))
    ]

In [9]:
generic["barge"].shape

(5865057, 7)

## Find all ship tracks with None as origin and destination

In [10]:
for vessel_type in ["atb",'barge']:
    # keep rows with None attribution 
    shp_tmp = allTracks[vessel_type].isnull()
    row_has_None = shp_tmp.any(axis=1)
    allNone[vessel_type] = allTracks[vessel_type][row_has_None]

## Find all ship tracks with no None designations in either origin or destination

In [11]:
for vessel_type in ["atb",'barge']:
    # drop rows with None attribution
    noNone[vessel_type] = allTracks[vessel_type].dropna().reset_index(drop=True)

## Find all ship tracks with origin or destination as marine terminal
- compare this value to frac[vessel_type]["marine_terminal"] to quantify how many ship tracks have mixed origin/destation as marine_terminal/generic (I don't think mixed with None is possible). 

In [12]:
allfac = {}
toWA = {}
fromWA = {}
bothWA = {}
for vessel_type in ["atb",'barge']:
    allfac[vessel_type] = allTracks[vessel_type].loc[
        ((allTracks[vessel_type].TO.isin(facWA.FacilityName)) |
         (allTracks[vessel_type].FROM_.isin(facWA.FacilityName))
        )
    ]
    toWA[vessel_type] = allTracks[vessel_type].loc[
        (allTracks[vessel_type].TO.isin(facWA.FacilityName))
    ] 
    fromWA[vessel_type] = allTracks[vessel_type].loc[
         (allTracks[vessel_type].FROM_.isin(facWA.FacilityName))
    ] 
    bothWA[vessel_type] = allTracks[vessel_type].loc[
        ((allTracks[vessel_type].TO.isin(facWA.FacilityName)) &
         (allTracks[vessel_type].FROM_.isin(facWA.FacilityName)))
    ] 

## Find all ship tracks with any WA or CAD marine oil terminal as either origin or destination

In [13]:
allfacWACAD = {}
for vessel_type in ["atb","barge"]:
    allfacWACAD[vessel_type] = allTracks[vessel_type].loc[
        ((allTracks[vessel_type].TO.isin(facWA.FacilityName)) |
         (allTracks[vessel_type].FROM_.isin(facWA.FacilityName))|
         (allTracks[vessel_type].TO.isin(facCAD.Name)) |
         (allTracks[vessel_type].FROM_.isin(facCAD.Name))
        )
    ]

In [14]:
print(f'To OR From: {allfac["atb"].shape[0]}')  
print(f'To AND From: {bothWA["atb"].shape[0]}')  
print(f'To: {toWA["atb"].shape[0]}')
print(f'From: {fromWA["atb"].shape[0]}')
print(f'To + From: {toWA["atb"].shape[0] + fromWA["atb"].shape[0]}')
print(f'To + From - "To AND from": {toWA["atb"].shape[0] + fromWA["atb"].shape[0] - bothWA["atb"].shape[0]}')

To OR From: 129165
To AND From: 13238
To: 58520
From: 83883
To + From: 142403
To + From - "To AND from": 129165


In [15]:
allfacWACAD["atb"].shape[0]

240620

##### TAKE-AWAY: 
- 129165 ship tracks are to or from WA marine terminals with 
- 13238 of these having WA marine terminal as both to and from
- The remainder are mixed with origin or destination as WA marine terminal and the other end-member being US, Pacific, Canada or CAD marine terminal (None values shouldn't be includeded here) 

##### TEST: 
- All tracks = Generic + allFac + None

In [16]:
print(f'All tracks = Generic + allFac + allNone')
print(f'{allTracks["atb"].shape[0]} = {generic["atb"].shape[0] + allfac["atb"].shape[0] + allNone["atb"].shape[0]}')

All tracks = Generic + allFac + allNone
588136 = 476681


##### Hypothesis: the difference in the above is CAD terminal transfers.  Testing....

In [17]:
print(f'All tracks = Generic + allFacWACAD + allNone')
print(f'{allTracks["atb"].shape[0]} = {generic["atb"].shape[0] + allfacWACAD["atb"].shape[0] + allNone["atb"].shape[0]}')

All tracks = Generic + allFacWACAD + allNone
588136 = 588136


##### Good!  So 588136 - 476681  = 111455 => CAD traffic.  

In [18]:
# compare ATB tracks
vessel_type = "atb"
print(f'Attributed (no None in to or from): {noNone[vessel_type].shape[0]}')
print(f'Generic (to AND from): {generic[vessel_type].shape[0]}')
print(f'Attributed - Generic = {noNone[vessel_type].shape[0] - generic[vessel_type].shape[0]}')
print(f'Marine terminal (to or from): {allfac[vessel_type].shape[0]}')

Attributed (no None in to or from): 359943
Generic (to AND from): 119323
Attributed - Generic = 240620
Marine terminal (to or from): 129165


In [19]:
# create a dictionary of ratios between subsampled data and all ship tracks
frac = {}
frac['atb'] = {}
frac['barge'] = {}
for vessel_type in ["atb","barge"]:
    frac[vessel_type]["unattributed"] = allNone[vessel_type].shape[0]/allTracks[vessel_type].shape[0]
    frac[vessel_type]["attributed"] = noNone[vessel_type].shape[0]/allTracks[vessel_type].shape[0]
    frac[vessel_type]["generic"] = generic[vessel_type].shape[0]/allTracks[vessel_type].shape[0]
    frac[vessel_type]["marine_terminal_WACAD"] = allfacWACAD[vessel_type].shape[0]/allTracks[vessel_type].shape[0]
    frac[vessel_type]["marine_terminal_diff"] = frac[vessel_type]["attributed"] - frac[vessel_type]["generic"]
    print(f'~~~  {vessel_type} ~~~')
    print(f'Fraction of {vessel_type} tracks that are unattributed: {frac[vessel_type]["unattributed"]}')
    print(f'Fraction of {vessel_type} tracks that are attributed: {frac[vessel_type]["attributed"]}')
    print(f'Fraction of attributed {vessel_type} tracks that are generic : {frac[vessel_type]["generic"]}')
    print(f'Fraction of attributed {vessel_type} tracks that are linked to marine terminal (WACAD): {frac[vessel_type]["marine_terminal_WACAD"]}')
    print(f'Fraction of attributed {vessel_type} tracks that are linked to marine terminal (diff): {frac[vessel_type]["marine_terminal_diff"]}')

~~~  atb ~~~
Fraction of atb tracks that are unattributed: 0.38799359331855215
Fraction of atb tracks that are attributed: 0.6120064066814478
Fraction of attributed atb tracks that are generic : 0.20288334670892447
Fraction of attributed atb tracks that are linked to marine terminal (WACAD): 0.40912305997252335
Fraction of attributed atb tracks that are linked to marine terminal (diff): 0.4091230599725233
~~~  barge ~~~
Fraction of barge tracks that are unattributed: 0.4340840929832173
Fraction of barge tracks that are attributed: 0.5659159070167827
Fraction of attributed barge tracks that are generic : 0.4218586544846484
Fraction of attributed barge tracks that are linked to marine terminal (WACAD): 0.1440572525321343
Fraction of attributed barge tracks that are linked to marine terminal (diff): 0.14405725253213436


In [20]:
for vessel_type in ["atb","barge"]:
    print(f'Total number of tracks for {vessel_type}: {allTracks[vessel_type].shape[0]:1.2e}')

Total number of tracks for atb: 5.88e+05
Total number of tracks for barge: 1.39e+07


In [21]:
print(f'Total number of unattributed barge tracks: {allTracks[vessel_type].shape[0]*frac[vessel_type]["unattributed"]:10.2f}')
print(f'Total number of generically-attributed barge tracks: {allTracks[vessel_type].shape[0]*frac[vessel_type]["generic"]:10.2f}')
print(f'Total number of marine-terminal-attributed barge tracks: {allTracks[vessel_type].shape[0]*frac[vessel_type]["marine_terminal_WACAD"]:10.2f}')

Total number of unattributed barge tracks: 6035026.00
Total number of generically-attributed barge tracks: 5865057.00
Total number of marine-terminal-attributed barge tracks: 2002813.00


## Quantify barge and ATB cargo transfers in 2018 DOE database

In [22]:
[atb_in, atb_out]=get_DOE_atb(
    doe_xls_path, 
    fac_xls_path, 
    transfer_type = 'cargo', 
    facilities='selected'
)
barge_inout=get_DOE_barges(
    doe_xls_path,
    fac_xls_path, 
    direction='combined',
    facilities='selected',
    transfer_type = 'cargo')

get_DOE_barges: not yet tested with fac_xls as input
combined
cargo
cargo


In [23]:
transfers = {}
transfers["barge"] = barge_inout.shape[0]
transfers["atb"] = atb_in.shape[0] + atb_out.shape[0]
print(f'{transfers["atb"]} cargo transfers for atbs')
print(f'{transfers["barge"]} cargo transfers for barges')

677 cargo transfers for atbs
2773 cargo transfers for barges


### Group barge and atb transfers by AntID and:
- compare transfers
- compare fraction of grouped transfers to ungrouped transfers by vessel type

In [24]:
transfers["barge_antid"] = barge_inout.groupby('AntID').sum().shape[0]
transfers["atb_antid"] = atb_in.groupby('AntID').sum().shape[0] + atb_out.groupby('AntID').sum().shape[0]
print(f'{transfers["atb_antid"]} ATB cargo transfers based on AntID')
print(f'{transfers["atb_antid"]/transfers["atb"]:.2f} ATB fraction AntID to all')
print(f'{transfers["barge_antid"]} barge cargo transfers based on AntID')
print(f'{transfers["barge_antid"]/transfers["barge"]:.2f} barge fraction AntID to all')

482 ATB cargo transfers based on AntID
0.71 ATB fraction AntID to all
2334 barge cargo transfers based on AntID
0.84 barge fraction AntID to all


##### Take away: Barge and ATBs have similar number of mixed-oil-type transfers with ATBs having more mixed-type transfers (29% of 677) than barges (16% of 2334).  Even though values are similar, we will use the AntID grouped number of transfers for our ping to transfer ratios

### Calculate the number of oil cargo barges we expect using the AntID grouping for ping-to-transfer ratio

In [25]:
ping2transfer = {}
oilcargobarges = {}
# ATB ping-to-transfer ratio
ping2transfer["atb"] = allTracks["atb"].shape[0]/transfers["atb_antid"]
# Estimate number of oil cargo barges using number of barge transfers 
# and atb ping-to-transfer ratio
oilcargobarges["total"] = transfers["barge_antid"]*ping2transfer["atb"]
print(f'We expect {oilcargobarges["total"]:.0f} total oil cargo pings for barge traffic')

We expect 2847945 total oil cargo pings for barge traffic


### Calculate the number of Attributed tracks we get for ATBs and estimate the equivalent value for barges

In [26]:
# estimate the ratio of attributed ATB tracks to ATB cargo transfers
noNone_ratio = noNone["atb"].shape[0]/transfers["atb_antid"]
print(f'We get {noNone_ratio:.2f} attributed ATB tracks per ATB cargo transfer')
# estimate the amount of attributed tracks we'd expect to see for tank barges based on tank barge transfers
print(f'We expect {noNone_ratio*transfers["barge_antid"]:.2f} attributed barge tracks, but we get {noNone["barge"].shape[0]}')
# estimate spurious barge voyages by removing estimated oil carge barge from total
fraction_nonoilbarge = (noNone["barge"].shape[0]-noNone_ratio*transfers["barge_antid"])/noNone["barge"].shape[0]
print(f'We estimate that non-oil tank barge voyages account for {100*fraction_nonoilbarge:.2f}% of barge voyages')

We get 746.77 attributed ATB tracks per ATB cargo transfer
We expect 1742960.50 attributed barge tracks, but we get 7867870
We estimate that non-oil tank barge voyages account for 77.85% of barge voyages


In [27]:
#The above value was 88% when not using the AntID grouping

# Evaluate oil cargo traffic pings for ATBs and barges

In [28]:
# Dictionary for probability of oil cargo barges for our 3 attribution types
P_oilcargobarges = {}

In [29]:
allfac = {}
for vessel_type in ["atb",'barge']:
    allfac[vessel_type] = allTracks[vessel_type].loc[
        ((allTracks[vessel_type].TO.isin(facWA.FacilityName)) |
         (allTracks[vessel_type].FROM_.isin(facWA.FacilityName)))
    ] 

In [30]:
# Ratio of ATB pings with WA facility attribution to ATB WA transfers
fac_att_ratio = allfacWACAD["atb"].shape[0]/transfers["atb_antid"]
# Fraction of barge pings with generic attribution that are expected to carry oil based on ATB pings and transfers
P_oilcargobarges["facility"] = fac_att_ratio*transfers["barge_antid"]/allfacWACAD["barge"].shape[0]

In [31]:
print(f'{allfac["atb"].shape[0]} ATB tracks have a WA oil facility as origin or destination')
print(f'{allfac["barge"].shape[0]} barge tracks have a WA oil facility as origin or destination')
print(f'We get {fac_att_ratio:.2f} WA oil marine terminal attributed ATB tracks per ATB cargo transfer')
# estimate the amount of oil cargo facility tracks we'd expect to see for tank barges based on tank barge transfers
print(f'We expect {fac_att_ratio*transfers["barge_antid"]:.2f} WA oil marine terminal attributed barge tracks, but we get {allfac["barge"].shape[0]}')
fraction_nonoilbarge = (allfac["barge"].shape[0]-fac_att_ratio*transfers["barge_antid"])/allfac["barge"].shape[0]
print(f'We estimate that non-oil tank barge voyages to/from marine terminals account for {100*fraction_nonoilbarge:.2f}% of barge voyages attributed to WA marine terminals')

129165 ATB tracks have a WA oil facility as origin or destination
1666271 barge tracks have a WA oil facility as origin or destination
We get 499.21 WA oil marine terminal attributed ATB tracks per ATB cargo transfer
We expect 1165159.92 WA oil marine terminal attributed barge tracks, but we get 1666271
We estimate that non-oil tank barge voyages to/from marine terminals account for 30.07% of barge voyages attributed to WA marine terminals


###### When not grouped by AntID:
- 129165 ATB tracks have a WA oil facility as origin or destination
- 1666271 barge tracks have a WA oil facility as origin or destination
- We get 190.79 WA oil marine terminal attributed ATB tracks per ATB cargo transfer
- We expect 529061.37 WA oil marine terminal attributed barge tracks, but we get 1666271
- We estimate that non-oil tank barge voyages to/from marine terminals account for 68.25% of - barge voyages attributed to WA marine terminals

### Repeat for Generic attibution only

In [32]:
# Ratio of ATB pings with generic attribution to ATB WA transfers
generic_ratio = generic["atb"].shape[0]/transfers["atb_antid"]
# Fraction of barge pings with generic attribution that are expected to carry oil based on ATB pings and transfers
P_oilcargobarges["generic"] = generic_ratio*transfers["barge_antid"]/generic["barge"].shape[0]

In [33]:
print(f'{generic["atb"].shape[0]} ATB tracks have Pacific, US or Canada as origin or destination')
print(f'{generic["barge"].shape[0]} barge tracks have Pacific, US or Canada as origin or destination')
print(f'We get {generic_ratio:.2f} Generically attributed ATB tracks per ATB cargo transfer')
# estimate the amount of oil cargo facility tracks we'd expect to see for tank barges based on tank barge transfers
print(f'We expect {generic_ratio*transfers["barge_antid"]:.2f} Generically attributed barge tracks, but we get {generic["barge"].shape[0]}')
fraction_nonoilbarge = (generic["barge"].shape[0]-generic_ratio*transfers["barge_antid"])/generic["barge"].shape[0]
print(f'We estimate that non-oil tank barge voyages account for {100*fraction_nonoilbarge:.2f}% of barge voyages with both to/from as generic attributions ')

119323 ATB tracks have Pacific, US or Canada as origin or destination
5865057 barge tracks have Pacific, US or Canada as origin or destination
We get 247.56 Generically attributed ATB tracks per ATB cargo transfer
We expect 577800.59 Generically attributed barge tracks, but we get 5865057
We estimate that non-oil tank barge voyages account for 90.15% of barge voyages with both to/from as generic attributions 


### Repeat for No attribution

In [34]:
# Ratio of ATB pings with "None" attribution to ATB WA transfers
allNone_ratio = allNone["atb"].shape[0]/transfers["atb_antid"]
# Fraction of barge pings with None attribution that are expected to carry oil based on ATB pings and transfers
P_oilcargobarges["none"] = allNone_ratio*transfers["barge_antid"]/allNone["barge"].shape[0]

In [35]:
print(f'{allNone["atb"].shape[0]} ATB tracks have None as origin or destination')
print(f'{allNone["barge"].shape[0]} barge tracks have None as  as origin or destination')
print(f'We get {allNone_ratio:.2f} None attributed ATB tracks per ATB cargo transfer')
# estimate the amount of oil cargo facility tracks we'd expect to see for tank barges based on tank barge transfers
print(f'We expect {allNone_ratio*transfers["barge_antid"]:.2f} None attributed oil cargo barge tracks, but we get {allNone["barge"].shape[0]}')
fraction_nonoilbarge = (allNone["barge"].shape[0]-allNone_ratio*transfers["barge_antid"])/allNone["barge"].shape[0]
print(f'We estimate that non-oil tank barge voyages account for {100*fraction_nonoilbarge:.2f}% of barge voyages with None attributions ')

228193 ATB tracks have None as origin or destination
6035026 barge tracks have None as  as origin or destination
We get 473.43 None attributed ATB tracks per ATB cargo transfer
We expect 1104984.36 None attributed oil cargo barge tracks, but we get 6035026
We estimate that non-oil tank barge voyages account for 81.69% of barge voyages with None attributions 


#### Find the probability of oil carge for each ping classification, i.e.:
- `oilcargobarges["total"]` = 588136.0 = (1) + (2) + (3), where 
- (1) `P_oilcargobarges["facilities"]` * `allfacWACAD["barges"].shape[0]`  
- (2) `P_oilcargobarges["none"]` * `allNone["barges"].shape[0]`  
- (3) `P_oilcargobarges["generic"]` * `generic["barges"].shape[0]`  

In [36]:
print(P_oilcargobarges["facility"])
print(P_oilcargobarges["none"])
print(P_oilcargobarges["generic"])

0.5817617106601806
0.18309521135382856
0.0985157663535479


In [37]:
print(allfacWACAD["barge"].shape[0])  
print(allNone["barge"].shape[0])
print(generic["barge"].shape[0])

2002813
6035026
5865057


In [38]:
print(P_oilcargobarges["facility"] * allfacWACAD["barge"].shape[0])
print(P_oilcargobarges["none"] * allNone["barge"].shape[0])
print(P_oilcargobarges["generic"] * generic["barge"].shape[0])

1165159.9170124482
1104984.3609958505
577800.5850622406


In [39]:
oilcargobarges["facilities"] = (P_oilcargobarges["facility"] * allfacWACAD["barge"].shape[0])
oilcargobarges["none"] = (P_oilcargobarges["none"] * allNone["barge"].shape[0])
oilcargobarges["generic"] = (P_oilcargobarges["generic"] * generic["barge"].shape[0])

In [40]:
print('oilcargobarges["total"] = oilcargobarges["facilities"] + oilcargobarges["none"] + oilcargobarges["generic"]?')
#oilcargobarges_sum = oilcargobarges["facilities"] + oilcargobarges["none"] + oilcargobarges["generic"]
print(f'{oilcargobarges["total"]:.0f} =? {oilcargobarges["facilities"] + oilcargobarges["none"] + oilcargobarges["generic"]:.0f}')
missing_pings = oilcargobarges["total"]-(oilcargobarges["facilities"] + oilcargobarges["none"] + oilcargobarges["generic"])
print(f' Missing {missing_pings} pings ({100*missing_pings/oilcargobarges["total"]:.0f}%)') 

oilcargobarges["total"] = oilcargobarges["facilities"] + oilcargobarges["none"] + oilcargobarges["generic"]?
2847945 =? 2847945
 Missing -4.656612873077393e-10 pings (-0%)


I'm not sure where this 11% error comes from. I used WA-only terminal pings and transfers for ping-to-transfer ratio but multiplied this by the total number of CAD and WA oil transfer terminal pings.  

In [41]:
allfacWACAD["barge"].shape[0] + generic["barge"].shape[0] + allNone["barge"].shape[0]

13902896

In [42]:
allTracks["barge"].shape[0]

13902896