### This notebook creates a .yaml "driver" file used in the monte_carlo setup 

This notebook uses values in an oil_capacity.xlsx spreadsheet. 

**To do**: 
- tighten-up values [in process]
- Update barge and ATB min/max fuel values to come from .xlsx spreadsheet rather than being hard-coded
- Make sure all the values we need for this notebook are in oil_capacity.xlsx (From cargo and fuel capacity values and references are in [this google spreadsheet](https://docs.google.com/spreadsheets/d/1URKN77iQHN-2cAVfG5T7SEiDTLTcolqL/edit#gid=1024416588))

In [1]:
import sys
import yaml
import pathlib
import numpy
import pandas
import matplotlib.pyplot as plt
import os
import warnings
from monte_carlo_utils import make_bins, get_bin, place_into_bins, decimal_divide
from monte_carlo_utils import get_DOE_atb_transfers, get_doe_transfers 
from monte_carlo_utils import get_voyage_transfers, assign_facility_region

### Set values used in calcuations

In [2]:
# This is the precision used to calculate weights 
# It seems that a precision of 2 is neccessary for weights
# to sum to 1.0
precision = 2

### Define Directories & Create Dictionary Structure

In [3]:
output_directory = pathlib.Path(
    '/Users/rmueller/Data/MIDOSS/marine_transport_data/'
)
output_file = output_directory / 'oil_attribution.yaml'

graphics_directory = pathlib.Path(
    '/Users/rmueller/Projects/MIDOSS/graphics_figures/monte_carlo/oil_capacities'
)
graphics_directory_transfers = pathlib.Path(
    '/Users/rmueller/Projects/MIDOSS/graphics_figures/monte_carlo/6_regions'
)
doe_directory  = pathlib.Path('/Users/rmueller/Data/MIDOSS/DeptOfEcology/')
DOE_2018_xlsx = doe_directory/'MuellerTrans4-30-20.xlsx'

# Facility names and lat/lon information file
facilities_xlsx = pathlib.Path(
    '/Users/rmueller/Data/MIDOSS/marine_transport_data/'
    'Oil_Transfer_Facilities.xlsx'
)
# Voyage dataset
CamSheet = pathlib.Path(
    '/Users/rmueller/Library/Mobile Documents/'
    'com~apple~CloudDocs/Documents/Publications/MIDOSS/'
    'MIDOSS_MuellerEtAl_paper1/Methods/origin-destination/'
    'Origin_Destination_Analysis_updated.xlsx'
)

#~~~ create dictionary structure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
oil_attrs = {}

# create a section for directories and filenames
oil_attrs['directories'] = {}
oil_attrs['directories']['data'] = pathlib.Path(
    '/Users/rmueller/Data/MIDOSS/marine_transport_data/'
)
oil_attrs['directories']['geotiff'] = pathlib.Path(
    '/Users/rmueller/Data/MIDOSS/AIS/ShipTrackDensityGeoTIFFs/'
)

oil_attrs['files'] = {} 
# yaml files with Dept. of Ecology oil transfer data 
# US_origin is for US as origin 
# US_all represents the combined import and export of fuel

# to support smaller line lenghts!
ddir = oil_attrs['directories']['data']
# to save to file
oil_attrs['directories']['ais_ping_data'] = os.fspath(
    oil_attrs['directories']['data']/'Origin_Destination_Analysis_updated.xlsx'
)
oil_attrs['directories']['data'] = os.fspath(oil_attrs['directories']['data'])
oil_attrs['directories']['geotiff'] = os.fspath(oil_attrs['directories']['geotiff'])

# now to define other paths
oil_attrs['files']['CAD_origin']     = os.fspath(ddir/'CAD_origin.yaml')
oil_attrs['files']['WA_destination'] = os.fspath(ddir/'WA_destination.yaml')
oil_attrs['files']['WA_origin']      = os.fspath(ddir/'WA_origin.yaml')
oil_attrs['files']['US_origin']      = os.fspath(ddir/'US_origin.yaml')
oil_attrs['files']['US_combined']    = os.fspath(ddir/'US_general.yaml')
oil_attrs['files']['Pacific_origin'] = os.fspath(ddir/'Pacific_origin.yaml')
oil_attrs['files']['fuel']           = os.fspath(ddir/'fuel_by_vessel.yaml')
oil_attrs['files']['facility_region']= os.fspath(ddir/'facility_region.yaml')

# excel spreadsheet with oil capacity data for all vessel types
oil_attrs['files']['oil_capacity']   = os.fspath(ddir/'oil_capacity.xlsx')

# weights for allocating tank barge capacities for ATBs < 50 m and tank barges 
oil_attrs['files']['barge_weights']  = os.fspath(ddir/'barge_capacity.yaml')

#~~~ Model setup categories ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# create a section for general categories 
oil_attrs['categories'] = {}

# labels for AIS ship traffic groups
oil_attrs['categories']['all_vessels'] = [
    'tanker', 
    'atb', 
    'barge', 
    'cargo', 
    'cruise',
    'ferry', 
    'fishing', 
    'smallpass', 
    'other'
]

# labels for oil tank traffic identified in AIS
oil_attrs['categories']['tank_vessels'] = [
    'tanker', 
    'atb', 
    'barge'
]

# tanker size classes (still in considerations)
oil_attrs['categories']['tanker_size_classes'] = [
    "Small Tanker (< 150 m)", 
    "Handymax (151-200 m)", 
    "Aframax (201-250 m)", 
    "SuezMax (251-300 m)" 
]

# fuel types used in categorizing Dept. of Ecology data and 
# for which we have Lagrangian files (with the exception of 'other')
oil_attrs['categories']['fuel_type']    = [
    'akns', 
    'bunker', 
    'dilbit', 
    'jet', 
    'diesel', 
    'gas', 
    'other'
]

# Labels given to ship tracks in our AIS origin-destination analysis.
# If ship tracks lack an origin or destination in our marine terminal list
# it's because (a) It's a part of a voyage < 2 km, (b) It's part of a voyage
# > 2 km but with a start or end that isn't close enough to marine terminal
# to attribute.
oil_attrs['categories']['generic_origin_destination'] = [
    'Pacific',
    'US',
    'Canada'
]
oil_attrs['categories']['CAD_origin_destination'] = [
    'ESSO Nanaimo Departure Bay',
    'Suncor Nanaimo',
    'Shell Chemainus Bare Point',
    'Chevron Cobble Hill',
    'Chevron Stanovan',
    'Barge Mooring Near Stanovan',
    'Shell Shellburn',
    'Westridge Marine Terminal',
    'ESSO IOCO Vancouver',
    'Suncor Vancouver'
]
# Terminals that are grouped in voyage joins are removed, e.g.:
# 'Maxum Petroleum - Harbor Island Terminal' and 
# 'Shell Oil LP Seattle Distribution Terminal' are represented by
# 'Kinder Morgan Liquids Terminal - Harbor Island'. 
# 'Nustar Energy Tacoma' is represented by 'Phillips 66 Tacoma Terminal'
oil_attrs['categories']['US_origin_destination'] = [ 
    'BP Cherry Point Refinery', 
    'Shell Puget Sound Refinery', 
    'Tidewater Snake River Terminal', 
    'SeaPort Sound Terminal', 
    'Tesoro Vancouver Terminal',
    'Phillips 66 Ferndale Refinery', 
    'Phillips 66 Tacoma Terminal', 
    'Marathon Anacortes Refinery (formerly Tesoro)',
    'Tesoro Port Angeles Terminal',
    'U.S. Oil & Refining',
    'Naval Air Station Whidbey Island (NASWI)',
    'NAVSUP Manchester', 
    'Alon Asphalt Company (Paramount Petroleum)', 
    'Kinder Morgan Liquids Terminal - Harbor Island',
    'Nustar Energy Vancouver',
    'Tesoro Pasco Terminal', 
    'REG Grays Harbor, LLC', 
    'Tidewater Vancouver Terminal',
    'TLP Management Services LLC (TMS)']

# ~~~ Create dictionary structure for vessel attributes  ~~~~~~~~~~~~~~~~~~~~

# create sections organized by vessel type
oil_attrs['vessel_attributes'] = {}

for vessels in oil_attrs['categories']['all_vessels']:
    oil_attrs['vessel_attributes'][vessels] = {}

In [4]:
fdf = assign_facility_region(facilities_xlsx)
facility_regions = dict(zip(fdf['FacilityName'],fdf['Region']))
with open(oil_attrs['files']['facility_region'], 'w') as file:
    doc=yaml.safe_dump(facility_regions, file)

In [5]:
facility_regions['Tesoro Port Angeles Terminal']

'Puget Sound'

## load data

In [6]:
#~~~ tanker traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
tanker_df = pandas.read_excel(
    oil_attrs['files']['oil_capacity'],
    sheet_name='tanker', 
    usecols='B,C,D,E,F,G,H'
)
#~~~ atb traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# fuel and cargo capacity information
# K: "Vessel length (AIS)"
# L: "Vessel Length (AIS + <50 m correction)"
# O: "Total Fuel Capacity (liters)"
# P: "Cargo Capacity (liters)"

atb_df = pandas.read_excel(
    oil_attrs['files']['oil_capacity'],
    sheet_name='atb', 
    usecols='K,L,O,P',
    skiprows=1
)

# AIS information
# B: COUNT, 
# C: LENGTH, 
# D: Median time
atb_data_df = pandas.read_excel(
    oil_attrs['files']['oil_capacity'],
    sheet_name='atb data', 
    usecols='B,C,D'
)
#~~~ cargo traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
cargo_df = pandas.read_excel(
    oil_attrs['files']['oil_capacity'],
    sheet_name='cargo', 
    usecols='B,D,I,J'
)
#~~~ cruise traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
cruise_df = pandas.read_excel(
    oil_attrs['files']['oil_capacity'],
    sheet_name='cruise', 
    usecols='B,D,I,J'
)
#~~~ ferry traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ferry_df = pandas.read_excel(
    oil_attrs['files']['oil_capacity'],
    sheet_name='ferry', 
    usecols='B,D,I,J'
)
#~~~ small passenger traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
smallpass_df = pandas.read_excel(
    oil_attrs['files']['oil_capacity'],
    sheet_name='smallpass', 
    usecols='B,D'
)
#~~~ fishing traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
fishing_df = pandas.read_excel(
    oil_attrs['files']['oil_capacity'],
    sheet_name='fishing', 
    usecols='A,C,I,J'
)

## Calculate barge ping-to-transfer ratio

#### Count ATB ship tracks in 2018 AIS data

In [7]:
# get the total number of ship tracks
# B: "Number of vessel tracks"
atb_ship_tracks = pandas.read_excel(
    oil_attrs['directories']['ais_ping_data'],
    sheet_name='Frequency of AIS Pings', 
    usecols='B',
    skiprows=1
).sum().item()
print(f'{atb_ship_tracks:.1f} ship tracks are associated with our selected ATB traffic')

588136.0 ship tracks are associated with our selected ATB traffic


#### Count cargo transfers in 2018 Dept. of Ecology database

In [8]:
# get cargo transfers to atbs from all sources (vessel and marine terminal)
# For comparison, the number of transfers with facilities="selected" is 683
transfers = get_DOE_atb_transfers(
    DOE_2018_xlsx, 
    facilities_xlsx,
    transfer_type = 'cargo',
    facilities='all'
)
print(f'{transfers} total cargo transfers to_from ATBs')

this code not yet tested with fac_xls as input
cargo
214 cargo transfers from all sources
cargo
471 cargo transfers from all sources
685 total cargo transfers to_from ATBs


In [9]:
# The number of transfers that I got before was 598, 
# so I'm going to double check this calculation
DOE_df = pandas.read_excel(
    DOE_2018_xlsx,
    sheet_name='Vessel Oil Transfer', 
    usecols="G,H,P,Q,R,W,X"
)
transfers_df = DOE_df.loc[
    (DOE_df.TransferType == 'Cargo') &   
    (DOE_df.Deliverer.str.contains('ITB') | 
     DOE_df.Receiver.str.contains('ITB') |
     DOE_df.Deliverer.str.contains('ATB') |
     DOE_df.Receiver.str.contains('ATB')
    ),'Deliverer'
].count().item()
print(f'{transfers_df} ATB cargo transfers in WA in 2018')

684 ATB cargo transfers in WA in 2018


The two methods are off by one for reasons that I don't yet see.  
For now, I'm moving forward with the "transfers" estimate of 684

#### Calculate ping-to-transfer ratio for ATBs using the above estimates

In [10]:
ping_to_transfer = atb_ship_tracks/transfers_df
print(f'{ping_to_transfer:.2f} is the AIS ping to DOE cargo transfer ratio for ATB traffic')

859.85 is the AIS ping to DOE cargo transfer ratio for ATB traffic


#### Calculate the median time interval weighted by number of tracks for both ATBs and tank barges to estimate the number of AIS ship tracks that we would expect to see in AIS given the number of cargo transfers that we see in the Dept. of Ecology database

In [None]:
# F: ATB median time between AIS pings
# P: non-ATB median time between AIS pings
ais_df = pandas.read_excel(
    oil_attrs['directories']['ais_ping_data'],
    sheet_name='Frequency of AIS Pings', 
    usecols='F,P',
    skiprows=1
).rename(columns={
    'Median time between pings (minutes)':'ATB',
    'Median time between pings (minutes).1':'non-ATB'
})


In [None]:
ais_df.head()

In [None]:
atb_time = numpy.nanmedian(ais_df['ATB'])
barge_time = numpy.nanmedian(ais_df['non-ATB'])
barge2atb_ratio = barge_time/atb_time

print(f'ATB time = {atb_time:.2f}, barge time = {barge_time:.2f}, ratio = {barge2atb_ratio:.3f}')

#### Calculate barge transfers

In [None]:
from monte_carlo_utils import get_DOE_barges
barge_transfers = get_DOE_barges(
    DOE_2018_xlsx,
    facilities_xlsx,
    direction='combined',
    facilities='all',
    transfer_type = 'cargo'
)
print(f'{len(barge_transfers)} barge cargo transfers in WA in 2018')

In [None]:
# Now use the non-function method, for comparison
barge_transfers_df = DOE_df.loc[
    (DOE_df.TransferType == 'Cargo') &  
    (DOE_df.ReceiverTypeDescription.isin(
        ['TANK BARGE','TUGBOAT']) | 
     DOE_df.DelivererTypeDescription.isin(
        ['TANK BARGE','TUGBOAT'])
    ) & 
    (~DOE_df.Receiver.str.contains('ITB')) & 
    (~DOE_df.Receiver.str.contains('ATB')) &
    (~DOE_df.Deliverer.str.contains('ITB')) & 
    (~DOE_df.Deliverer.str.contains('ATB')),
    'Deliverer'
].count().item()
print(f'{barge_transfers_df} barge cargo transfers in WA in 2018')

Similar to ATB, these two calculations are slightly for reasons that I 
need to understand.  For now, for consistency, I am sticking with the 2947 estimate

#### Count barge traffic AIS pings and estimate barge oil cargo traffic using the ATB ping-to-transfer ratio

In [None]:
# get the total number of ship tracks
# K: "Number of vessel tracks" for *all* non-ATB barges 
#     (not just those in Casey's sub-sample dataset.)
barge_ship_tracks = pandas.read_excel(
    oil_attrs['directories']['ais_ping_data'],
    sheet_name='Frequency of AIS Pings', 
    usecols='K',
    skiprows=1
).sum().item()
print(
    f'{barge_ship_tracks:.2e} ship tracks associated with non-ATB barge traffic'
)

# Calculate non-ATB oil cargo traffic using ATB ping-to-transfer ratio, 
# scaled by the ratio of median sample time intervals for barge AIS 
# ship track vs atb AIS ship tracks
barge_oilcargo_pings = barge_transfers_df * ping_to_transfer * barge2atb_ratio
print(
    f'{barge_oilcargo_pings:.2e} estimated oil cargo tracks' 
    ' associated with non-ATB barge traffic'
)

### Set Mix/Max thresholds

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Set the probability of incident as fuel spill vs. cargo spill for 
#  oil-cargo vessels.  The following six values are for cases thare are 
#  identified as oil cargo vessels and determines if the spill from the oil 
#  cargo vessel is a fuel spill or a cargo spill.  The barge attribution 
#  requires another level of determining fuel vs. cargo spill because most of 
#  the barge traffic is not oil cargo traffic.  We set all barges with marine 
#  terminal origin/destination to oil cargo barges and use 
#  "probability_oilcargo" to determine if the remaining barges are oil 
#  cargo barges or fuel barges. 
oil_attrs['vessel_attributes']['tanker']['probability_fuel']  = 0.2
oil_attrs['vessel_attributes']['tanker']['probability_cargo'] = 1 - \
oil_attrs['vessel_attributes']['tanker']['probability_fuel']

oil_attrs['vessel_attributes']['atb']['probability_fuel']  = 0.2
oil_attrs['vessel_attributes']['atb']['probability_cargo'] = 1 - \
oil_attrs['vessel_attributes']['atb']['probability_fuel']

oil_attrs['vessel_attributes']['barge']['probability_fuel']  = 0.2
oil_attrs['vessel_attributes']['barge']['probability_cargo'] = 1 - \
oil_attrs['vessel_attributes']['barge']['probability_fuel']

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Set the probability of a barge carrying oil-cargo for a ship track 
#  that is identified as barge but that lacks origin-destination information
# 57% of barge tracks were joined in the origin-destination analysis 
# (and will be attributed as having oil cargo).
# float() converts from object to value that safe_yaml finds acceptable
# This issue is apparently fixed in newer releases. 
oil_attrs['vessel_attributes']['barge']['probability_oilcargo'] = float(
    (1 - 0.57) * barge_oilcargo_pings/barge_ship_tracks 
)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## set minimum cargo capacities in liters
oil_attrs['vessel_attributes']['tanker']['min_cargo'] = 7593628  # small tanker size 
oil_attrs['vessel_attributes']['atb']['min_cargo']    = 4000000  # smallest load of known ATBs (Island Trader: oil_capacity.xlsx)
oil_attrs['vessel_attributes']['barge']['min_cargo']  = 1500000  # 1,589,900: 3A-10 of [DOE report](https://fortress.wa.gov/ecy/publications/documents/96250.pdf)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## set maximum cargo capacities in liters
oil_attrs['vessel_attributes']['tanker']['max_cargo'] = 185447000 # Corresponding to tank capacity of Suez-Max tankers as AIS data shows tanker lengths up to 287 m
oil_attrs['vessel_attributes']['atb']['max_cargo']    =  29413150 # largest load of known ATBs (Kirby 185-01: oil_capacity.xlsx)
oil_attrs['vessel_attributes']['barge']['max_cargo']  =  32000000 # 31,797,444: 3A-10 of [DOE report](https://fortress.wa.gov/ecy/publications/documents/96250.pdf)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Set fuel size classes 
# Tankers: SuezMax (285 m), Aframax (245 m), Handymax (180 m), Small Tanker (116 m)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## set minimum fuel capacities in liters
# PLACEHOLDER[Needs AIS verification]: Reflects small tanker in 
# https://silo.tips/download/suezmax-oil-tanker-tankers-166300-dwt
#
oil_attrs['vessel_attributes']['tanker']['min_fuel']    = (
    tanker_df['total fuel capacity (liters)'][0].tolist()
)
# End PLACEHOLDER
oil_attrs['vessel_attributes']['atb']['min_fuel']       = 99000   # Based on Island Raider and Island Regent tugs in Charles Costanzo's `ATB Information AWO Edits`
oil_attrs['vessel_attributes']['barge']['min_fuel']     = 6000    # 5678 (F43): Harbour Tug
oil_attrs['vessel_attributes']['cargo']['min_fuel']     = 1500000 # 1,514,164
oil_attrs['vessel_attributes']['cruise']['min_fuel']    = 1000000 
oil_attrs['vessel_attributes']['ferry']['min_fuel']     = 30000   # 34,068
oil_attrs['vessel_attributes']['fishing']['min_fuel']   = 150
oil_attrs['vessel_attributes']['smallpass']['min_fuel'] = 100
oil_attrs['vessel_attributes']['other']['min_fuel']     = 100   # 22 (F52)

oil_attrs['vessel_attributes']['tanker']['min_length']    = 150
oil_attrs['vessel_attributes']['atb']['min_length']       = 22
oil_attrs['vessel_attributes']['barge']['min_length']     = 22
oil_attrs['vessel_attributes']['cargo']['min_length']     = 50
oil_attrs['vessel_attributes']['cruise']['min_length']    = 60
oil_attrs['vessel_attributes']['ferry']['min_length']     = 26
oil_attrs['vessel_attributes']['fishing']['min_length']   = 20
oil_attrs['vessel_attributes']['smallpass']['min_length'] = 10
oil_attrs['vessel_attributes']['other']['min_length']     = 10

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## set maximum fuel capacities in liters (to be filled in )
# NOTE: Max volume for ferries reflects WA ferries.  See "Other Ship Classifications"
# and Living Oceans information with reference to [BC ferries doc](http://ferriesbc.proboards.com/thread/7592/fueling-bcs-ferries)
oil_attrs['vessel_attributes']['tanker']['max_fuel']    = tanker_df['total fuel capacity (liters)'][3].tolist()
# NOTE: Ocean-going tank barge listed as 52,995,000 liters max fuel capacity 
# accoring to [NOAA](https://response.restoration.noaa.gov/about/media/how-much-oil-ship.html)
oil_attrs['vessel_attributes']['atb']['max_fuel']       = 550000    # Again, based on Dublin Sea Charles Costanzo's 
oil_attrs['vessel_attributes']['barge']['max_fuel']     = 710000    # 700,300: Ocean Tug (need to verify that we have them in S.S.)
oil_attrs['vessel_attributes']['cargo']['max_fuel']     = 17000000  
oil_attrs['vessel_attributes']['cruise']['max_fuel']    = 7570820   
oil_attrs['vessel_attributes']['ferry']['max_fuel']     = 500000     
oil_attrs['vessel_attributes']['fishing']['max_fuel']   = 300000    # (see p. 3A-6 of [DOE report](https://fortress.wa.gov/ecy/publications/documents/96250.pdf))
oil_attrs['vessel_attributes']['smallpass']['max_fuel'] = 12000     # 11356 (F41)
oil_attrs['vessel_attributes']['other']['max_fuel']     = 17000000     # 3,028 (F77): Cruising, sport fishing, sailing

oil_attrs['vessel_attributes']['tanker']['max_length']    = 300
oil_attrs['vessel_attributes']['atb']['max_length']       = 209
oil_attrs['vessel_attributes']['barge']['max_length']     = 100
oil_attrs['vessel_attributes']['cargo']['max_length']     = 369
oil_attrs['vessel_attributes']['cruise']['max_length']    = 334
oil_attrs['vessel_attributes']['ferry']['max_length']     = 168
oil_attrs['vessel_attributes']['fishing']['max_length']   = 200
oil_attrs['vessel_attributes']['smallpass']['max_length'] = 75
oil_attrs['vessel_attributes']['other']['max_length']     = 75

### Load Oil Capacity Data and Attribute Vessels

In [None]:
oil_attrs['vessel_attributes']['tanker']['tanker_types'] = (
    oil_attrs['categories']['tanker_size_classes']
)

# fuel parameters
oil_attrs['vessel_attributes']['tanker']['fuel_hfo_to_diesel'] = (
    tanker_df['hfo fuel capacity (liters)'].values.flatten()/
    tanker_df['diesel fuel capacity (liters)'].values.flatten()
).tolist()

oil_attrs['vessel_attributes']['tanker']['fuel_capacity'] = (
    tanker_df['total fuel capacity (liters)'].values.flatten() 
).tolist()

# cargo parameters
oil_attrs['vessel_attributes']['tanker']['cargo_capacity'] = (
    tanker_df['cargo capacity (liters)'].values.flatten() 
).tolist()

# These are the lengths of the vessels in the above website that correspond
# to the fuel ratios  and fuel capacities listed above
oil_attrs['vessel_attributes']['tanker']['vessel_lengths'] = (
    tanker_df['Silo ship size (m)'].values.flatten()
).tolist()

# create  bins for tanker categories
# Note: get_bin requires that adjacent bins share values, e.g.:
# [(0,150)(150,250)(250,300)]
# The sets are treated as: (0,150],(150,250], and (250,300]
bins = []
center_points = []
for index in range(len(tanker_df['min length (m)']-1)):
    bins.append(
        (tanker_df['min length (m)'][index].tolist(), 
         tanker_df['max length (m)'][index].tolist()+1)
    )
    step_size = (
        tanker_df['max length (m)'][index].tolist() - 
        tanker_df['min length (m)'][index].tolist()
    )
    center_points.append(tanker_df['min length (m)'][index] + step_size/2)

oil_attrs['vessel_attributes']['tanker']['length_bins'] = tuple(map(tuple, bins))

#~~~ atb traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# create line fit for atbs with length > 50 m
# fit = coefficient[1] + coefficient[0] * length
good_values = atb_df['Vessel Length (AIS + <50 m correction)']>0
cargo_fit_coefs = numpy.polyfit(
    atb_df['Vessel Length (AIS + <50 m correction)'][good_values], 
    atb_df['Cargo Capacity (liters)'][good_values], 
    1
)
oil_attrs['vessel_attributes']['atb']['cargo_fit_coefs'] = (
    cargo_fit_coefs.tolist()
)

fuel_fit_coefs= numpy.polyfit(
    atb_df['Vessel Length (AIS + <50 m correction)'][good_values], 
    atb_df['Total Fuel Capacity (liters)'][good_values], 
    1
)

oil_attrs['vessel_attributes']['atb']['fuel_fit_coefs'] = (
    fuel_fit_coefs.tolist()
)

# create fuel and cargo capacity bins into which vessel traffic information 
# is sorted to determine oil capacity weights. 
[cargo_bins, cargo_bin_centers]  = make_bins(
    lower_bound =  4000000,
    upper_bound = 30000000,
    step_size   = 2000000
)

[fuel_bins, fuel_bin_centers]  = make_bins(
    lower_bound =  75000,
    upper_bound = 675000,
    step_size   =  25000
)

# we use the median time interval between AIS pings per MMSI vessel to scale
# the total AIS ship track count so that count isn't biased by ping interval.  
scaling_factor = ( atb_data_df['Median time between pings (minutes)'] / 
    numpy.median(atb_data_df['Median time between pings (minutes)'])
)
atb_scaled_count = (atb_data_df['COUNT'] * scaling_factor )


### cargo weights ###
# bin scaled count (above) by cargo capacity and sum AIS count for each 
# cargo capacity bin to create cargo capacity weights
binned_atb_count_cargo = place_into_bins(
    atb_df['Cargo Capacity (liters)'], 
    atb_scaled_count, 
    cargo_bins
)
numerator = binned_atb_count_cargo
denominator = binned_atb_count_cargo.sum()
cargo_weights = decimal_divide(
    numerator, 
    denominator, 
    precision
)

print(f'ATB cargo weight sum: {sum(cargo_weights)}')
    
# define weighting for oil capacity attribution for atbs < 50 m   
oil_attrs['vessel_attributes']['atb']['cargo_capacity_probability'] = (
    cargo_weights.tolist()
)
oil_attrs['vessel_attributes']['atb']['cargo_capacity_bin_centers'] = (
    cargo_bin_centers
)
oil_attrs['vessel_attributes']['atb']['cargo_capacity_bins'] = (
    cargo_bins
)

### fuel weights ### 
# bin distance travelled by fuel capacity and sum distances for each 
# fuel capacity bin to create fuel capacity weights
binned_atb_count_by_fuel = place_into_bins(
    atb_df['Total Fuel Capacity (liters)'], 
    atb_scaled_count, 
    fuel_bins
)
numerator = binned_atb_count_by_fuel
denomintor = binned_atb_count_by_fuel.sum()
fuel_weights = decimal_divide(
    numerator, 
    denominator, 
    precision
)
print(f'ATB fuel weight sum: {sum(fuel_weights)}')

oil_attrs['vessel_attributes']['atb']['fuel_capacity_probability']  = (
    fuel_weights.tolist()
)
oil_attrs['vessel_attributes']['atb']['fuel_capacity_bin_centers']  = (
    fuel_bin_centers
)
oil_attrs['vessel_attributes']['atb']['fuel_capacity_bins'] = (
    fuel_bins
)

#~~~ barge traffic (set to ATB) ~~~
# We use ATB traffic to determine barge weights b/c the number of ATB 
# vessels is manageable and has tugs paired with tank barges
oil_attrs['vessel_attributes']['barge']['cargo_capacity_probability'] = 'See ATB'
oil_attrs['vessel_attributes']['barge']['cargo_capacity_bin_centers'] = 'See ATB'
oil_attrs['vessel_attributes']['barge']['cargo_capacity_bins']        = 'See ATB'
oil_attrs['vessel_attributes']['barge']['fuel_capacity_probability']  = 'See ATB'
oil_attrs['vessel_attributes']['barge']['fuel_capacity_bin_centers']  = 'See ATB'
oil_attrs['vessel_attributes']['barge']['fuel_capacity_bins']         = 'See ATB'

#~~~ cargo traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
min_length = cargo_df['AIS min length'][0]
max_length = cargo_df['AIS max length'][0]
length = cargo_df['vessel length (m)']
capacity = cargo_df['fuel capacity (liters)']
vessel_type = 'cargo'

cargo_fit_coefs= numpy.polyfit(
    length, 
    numpy.log(capacity), 
    1
)

oil_attrs['vessel_attributes']['cargo']['fuel_fit_coefs'] = (
    cargo_fit_coefs.tolist()
)

cargo_length = numpy.arange(150, 400, 10)
C = cargo_fit_coefs
cargo_fit = (
    numpy.exp(C[1]) * 
    numpy.exp(C[0]*cargo_length)
)
print(f'Cargo volume fit for length 620 m: {numpy.exp(C[1]) *  numpy.exp(C[0]*620):1.5e}' )
print(f'Cargo volume fit for length Benjamin Franklin (400 m, 1.4e7 liters): {numpy.exp(C[1]) *  numpy.exp(C[0]*400):1.5e}' )
########### PLOT CARGO DATA FIT ###############
fig = plt.figure()
ax1 = fig.add_subplot(111)

# add ais-curated frequency data 
good_data = length>0
ax1.scatter(
    length[good_data],
    capacity[good_data],
    40,
    label='Fuel capacity data',
    color='tab:grey'
)

ax1.plot(
    cargo_length, 
    cargo_fit, 
    c='black', 
    label=f'exponential fit: {numpy.exp(C[1]):.02f}*e^({C[0]:.02f}*length)'
)

ax1.plot(
    [numpy.min(cargo_length), numpy.max(cargo_length)],
    [oil_attrs['vessel_attributes']['cargo']['min_fuel'], 
     oil_attrs['vessel_attributes']['cargo']['min_fuel']],
    c='tab:grey',
    label='Min/Max thresholds'
)
ax1.plot(
    [numpy.min(cargo_length), numpy.max(cargo_length)],
    [oil_attrs['vessel_attributes']['cargo']['max_fuel'], 
     oil_attrs['vessel_attributes']['cargo']['max_fuel']],
    c='tab:grey'
)
ax1.plot(
    [numpy.min(cargo_length), numpy.max(cargo_length)],
    [2e6, 2e6],'r:',
    label="Max threshold before correction"
)
plt.xlabel('vessel length (m)',fontsize=12)
plt.ylabel('fuel capacity (liters)',fontsize=12)
plt.title(vessel_type, fontsize=16)
plt.tight_layout()
ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.savefig(graphics_directory/'cargo_fit.png', bbox_inches='tight')
plt.show()

#~~~ cruise traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
min_length = cruise_df['AIS min length'][0]
max_length = cruise_df['AIS max length'][0]
length = cruise_df['vessel length (m)']
capacity = cruise_df['fuel capacity (liters)']
vessel_type = 'cruise'

cruise_fit_coefs= numpy.polyfit(
    length, 
    capacity, 
    1
)

oil_attrs['vessel_attributes']['cruise']['fuel_fit_coefs'] = (
    cruise_fit_coefs.tolist()
)

cruise_length = numpy.arange(250, 350, 10)
C = cruise_fit_coefs
cruise_fit = (
    C[1] +
    C[0]*cruise_length
)

########### PLOT CRUISE DATA FIT ###############
fig = plt.figure()
ax1 = fig.add_subplot(111)

# add ais-curated frequency data 
good_data = length>0
ax1.scatter(
    length[good_data],
    capacity[good_data],
    40,
    label='Fuel capacity data',
    color='tab:grey'
)
ax1.plot(cruise_length,
         cruise_fit, 
         c='black', 
         label=f'linear fit: {C[1]:.02f} + {C[0]:.02f}*length'
        )
ax1.plot(
    [numpy.min(cruise_length), numpy.max(cruise_length)],
    [oil_attrs['vessel_attributes']['cruise']['min_fuel'], 
     oil_attrs['vessel_attributes']['cruise']['min_fuel']],
    c='tab:grey',
    label='Min/Max thresholds'
)
ax1.plot(
    [numpy.min(cruise_length), numpy.max(cruise_length)],
    [oil_attrs['vessel_attributes']['cruise']['max_fuel'], 
     oil_attrs['vessel_attributes']['cruise']['max_fuel']],
    c='tab:grey'
)

ax1.plot(
    [numpy.min(cruise_length), numpy.max(cruise_length)],
    [5500000, 5500000], 'r:',
    label='Max threshold before correction'
)
plt.xlabel('vessel length (m)',fontsize=12)
plt.ylabel('fuel capacity (liters)',fontsize=12)
plt.title(vessel_type, fontsize=16)
plt.tight_layout()
ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.savefig(graphics_directory/'cruise_fit.png', bbox_inches='tight')
plt.show()

#~~~ ferry traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
min_length = ferry_df['AIS min length'][0]
max_length = ferry_df['AIS max length'][0]
length = ferry_df['vessel length (m)']
capacity = ferry_df['fuel capacity (liters)']
vessel_type = 'ferry'

ferry_fit_coefs= numpy.polyfit(
    length, 
    numpy.log(capacity), 
    1
)

oil_attrs['vessel_attributes']['ferry']['fuel_fit_coefs'] = (
    ferry_fit_coefs.tolist()
)

ferry_length = numpy.arange(20,160,10)

C = ferry_fit_coefs
ferry_fit = (
    numpy.exp(C[1])* 
    numpy.exp(C[0]*ferry_length)
)

print(f'Ferry tank volume for length of 75 m: {numpy.exp(C[1])* numpy.exp(C[0]*75):1.5e}')

########### PLOT FERRY DATA FIT ###############
fig = plt.figure()
ax1 = fig.add_subplot(111)

# add ais-curated frequency data 
good_data = length>0
ax1.scatter(
    length[good_data],
    capacity[good_data],
    40,
    label='Fuel capacity data',
    color='tab:grey'
)
ax1.plot(ferry_length,
         ferry_fit, 
         c='black', 
         label=f'exponential fit: {numpy.exp(C[1]):.02f}*e^({C[0]:.02f}*length)'
        )
ax1.plot(
    [numpy.min(ferry_length), numpy.max(ferry_length)],
    [oil_attrs['vessel_attributes']['ferry']['min_fuel'], 
     oil_attrs['vessel_attributes']['ferry']['min_fuel']],
    c='tab:grey',
    label='Min/Max thresholds'
)
ax1.plot(
    [numpy.min(ferry_length), numpy.max(ferry_length)],
    [oil_attrs['vessel_attributes']['ferry']['max_fuel'], 
     oil_attrs['vessel_attributes']['ferry']['max_fuel']],
    c='tab:grey'
)
plt.xlabel('vessel length (m)',fontsize=12)
plt.ylabel('fuel capacity (liters)',fontsize=12)
plt.title(vessel_type, fontsize=16)
plt.tight_layout()
ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.savefig(graphics_directory/'ferry_fit.png', bbox_inches='tight')
plt.show()

#~~~ fishing traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
length = fishing_df['vessel length (m)']
capacity = fishing_df['fuel capacity (liters)']
vessel_type = 'fishing'

fishing_exp_coefs= numpy.polyfit(
    length, 
    numpy.log(capacity), 
    1
)

fishing_length = numpy.arange(0, 80, 1)
Cexp = fishing_exp_coefs
fishing_exp_fit = (
    numpy.exp(Cexp[1])*
    numpy.exp(Cexp[0]*fishing_length)
)

fishing_fit_coefs= numpy.polyfit(
    length, 
    capacity, 
    2
)
oil_attrs['vessel_attributes']['fishing']['fuel_fit_coefs'] = (
    fishing_fit_coefs.tolist()
)

C = fishing_fit_coefs
fishing_fit = (
    C[2]+
    C[1]*fishing_length+
    C[0]*fishing_length**2
)

########### PLOT FISHING DATA FIT ###############
fig = plt.figure()
ax1 = fig.add_subplot(111)

# add ais-curated frequency data 
good_data = length>0
ax1.scatter(
    length[good_data],
    capacity[good_data],
    40,
    label='Fuel capacity data',
    color='tab:grey'
)
ax1.plot(fishing_length,
         fishing_fit, 
         c='black', 
         label=f'quadratic fit: {C[0]:.02f}*(length)^2 + {C[1]:.02f}*(length) + {C[2]:.02f}'
        )

ax1.plot(
    [numpy.min(fishing_length), numpy.max(fishing_length)],
    [oil_attrs['vessel_attributes']['fishing']['min_fuel'], 
     oil_attrs['vessel_attributes']['fishing']['min_fuel']],
    c='tab:grey',
    label='Min/Max thresholds'
)
ax1.plot(
    [numpy.min(fishing_length), numpy.max(fishing_length)],
    [oil_attrs['vessel_attributes']['fishing']['max_fuel'], 
     oil_attrs['vessel_attributes']['fishing']['max_fuel']],
    c='tab:grey'
)

#labels, etc.
plt.xlabel('vessel length (m)',fontsize=12)
plt.ylabel('fuel capacity (liters)',fontsize=12)
plt.title(vessel_type, fontsize=16)
plt.tight_layout()
ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.savefig(graphics_directory/'fishing_fit.png', bbox_inches='tight')
plt.show()

#~~~ small passenger traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
length = smallpass_df['vessel length (m)']
capacity = smallpass_df['fuel capacity (liters)']
vessel_type = 'smallpass'

smallpass_fit_coefs= numpy.polyfit(
    length, 
    numpy.log(capacity), 
    1
)

smallpass_fit_coefs_quad= numpy.polyfit(
    length, 
    capacity, 
    2
)

oil_attrs['vessel_attributes']['smallpass']['fuel_fit_coefs'] = (
    smallpass_fit_coefs.tolist()
)

smallpass_length = numpy.arange(0, 24, 1)
C = smallpass_fit_coefs
smallpass_fit = (
    numpy.exp(C[1])*
    numpy.exp(C[0]*smallpass_length)
)

C_q = smallpass_fit_coefs_quad
smallpass_fit_q = (
    C_q[2]+
    C_q[1]*smallpass_length+
    C_q[0]*smallpass_length**2
)
print(f'Small pass quadratic fit to 620m length: {(C_q[0]*620**2 + C_q[1]*620 + C_q[2]):.02f}')
print(f'Small pass quadratic fit to 400m length: {(C_q[0]*400**2 + C_q[1]*400 + C_q[2]):.02f}')
print(f'Small pass quadratic fit to 14m length: {(C_q[0]*14**2 + C_q[1]*14 + C_q[2]):.02f}')
print(f'Small pass quadratic fit to 16m length: {(C_q[0]*16**2 + C_q[1]*16 + C_q[2]):.02f}')
print(f'Small pass quadratic fit to 75m length: {(C_q[0]*75**2 + C_q[1]*75 + C_q[2]):.02f}')

########### PLOT SMALL PASS DATA FIT ###############
fig = plt.figure()
ax1 = fig.add_subplot(111)

# add ais-curated frequency data 
good_data = length>0
ax1.scatter(
    length[good_data],
    capacity[good_data],
    40,
    label='Fuel capacity data',
    color='tab:grey'
)
ax1.plot(
    smallpass_length, 
    smallpass_fit, 'r:', 
    label=f'exponential fit: {numpy.exp(C[1]):.02f}*e^({C[0]:.02f}*length)'
)
ax1.plot(
    smallpass_length, 
    smallpass_fit_q,  
    c='black', 
    label=f'quadratic fit: {C_q[0]:.02f}*(length)^2 + {C_q[1]:.02f}*(length) + {C_q[2]:.02f}'
)
ax1.plot(
    [numpy.min(smallpass_length), numpy.max(smallpass_length)],
    [oil_attrs['vessel_attributes']['smallpass']['min_fuel'], 
     oil_attrs['vessel_attributes']['smallpass']['min_fuel']],
    c='tab:grey',
    label='Min/Max thresholds'
)
ax1.plot(
    [numpy.min(smallpass_length), numpy.max(smallpass_length)],
    [oil_attrs['vessel_attributes']['smallpass']['max_fuel'], 
     oil_attrs['vessel_attributes']['smallpass']['max_fuel']],
    c='tab:grey'
)
#labels, etc.
plt.xlabel('vessel length (m)',fontsize=12)
plt.ylabel('fuel capacity (liters)',fontsize=12)
plt.title(vessel_type, fontsize=16)
plt.tight_layout()
ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.savefig(graphics_directory/'smallpass_fit.png', bbox_inches='tight')
plt.show()

#~~~ other ~~~
## Use same data as small passenger
oil_attrs['vessel_attributes']['other']['fuel_fit_coefs'] = (
    smallpass_fit_coefs.tolist()
)


### Save dictionary to .yaml file 

In [None]:
with open(output_file, 'w') as file:
    documents = yaml.safe_dump(oil_attrs, file)

In [None]:
# small passenger quadratic fit for median(small pass), max (small pass) and max(other)
lengths = [16,75,620]
[12.18041808*length**2 - 98.77603759*length + 209.20779143 for length in lengths]

In [None]:
#~~~ small passenger traffic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
length = smallpass_df['vessel length (m)']
capacity = smallpass_df['fuel capacity (liters)']
vessel_type = 'smallpass'

smallpass_fit_coefs= numpy.polyfit(
    length, 
    numpy.log(capacity), 
    1
)

oil_attrs['vessel_attributes']['smallpass']['fuel_fit_coefs'] = (
    smallpass_fit_coefs.tolist()
)

smallpass_length = numpy.arange(0, 24, 1)
C = smallpass_fit_coefs
smallpass_fit = (
    numpy.exp(C[1])*
    numpy.exp(C[0]*smallpass_length)
)
print(C[1], C[0], numpy.exp(C[1])*
    numpy.exp(C[0]*20)
     )

### Graphics

In [None]:
## placeholder for tanker cargo capacity bins

In [None]:
#~~~~ ATB (> 50 m) fuel capacity line fit ~~~~
C = oil_attrs['vessel_attributes']['atb']['fuel_fit_coefs']
test_length = numpy.arange(60,230,5)
test_atb_fit = ( 
    C[1] + 
    C[0]*test_length 
)

atbfig = plt.figure()
ax1 = atbfig.add_subplot(111)
ax1.scatter(
    atb_df['Vessel Length (AIS + <50 m correction)'][good_values],
    atb_df['Total Fuel Capacity (liters)'][good_values],
    60,
    label='AIS curated data',
    color='royalblue'
)
ax1.plot(test_length, test_atb_fit, c='royalblue', label='linear fit')
plt.xlabel('ATB length (m)',fontsize=12)
plt.ylabel('ATB fuel capacity (liters)',fontsize=12)
plt.tight_layout()
ax1.legend()
#plt.savefig('ATB_cargo_ais.png')
plt.show()
plt.savefig(graphics_directory/'atb_fuel_fit.png')

In [None]:
#~~~~ ATB (< 50 m) and tank barge cargo capacity weighting ~~~~
fig = plt.figure()
ax2 = fig.add_subplot(111)

# add binned-ship-track frequencies
ax2.bar(
    cargo_bin_centers,
    binned_atb_count_cargo,
    width=0.3e6,
    label='AIS pings(MMSI) * (pings interval (MMSI))/median(ping interval (MMSI)) binned by MMSI cargo capacity',
    color='darkslateblue'
)

# add ais-curated frequency data 
ax2.scatter(
    atb_df['Cargo Capacity (liters)'][0:18],
    atb_data_df['COUNT'],
    50,
    label='AIS pings by cargo capacity for each ATB MMSI',
    color='yellowgreen'
)

#labels, etc.
plt.xlabel('Oil cargo capacity (liters)',fontsize=12)
plt.ylabel('AIS pings scaled by interval weights',fontsize=12)
plt.tight_layout()
ax2.legend()
plt.savefig(graphics_directory/'tank_barge_pings_binnedbycapacity.png')
plt.show() 

In [None]:
small = atb_df['Vessel length (AIS)']<50
atbfig = plt.figure()
ax1 = atbfig.add_subplot(111)
ax1.scatter(
    atb_df['Vessel length (AIS)'][small],
    atb_df['Total Fuel Capacity (liters)'][small],
    60,
    label='AIS curated data',
    color='royalblue'
)
#ax1.plot(test_length, test_atb_fit, c='royalblue', label='linear fit')
plt.xlabel('ATB length (m)',fontsize=12)
plt.ylabel('ATB fuel capacity (liters)',fontsize=12)
plt.tight_layout()
ax1.legend()
#plt.savefig('ATB_cargo_ais.png')
plt.show()

In [None]:
#~~~~ plot  ais pings by vessel lengths  and ais pings by binned lengths, for comparison~~~~
aisfig = plt.figure()
ax1 = aisfig.add_subplot(111)


# add binned-ship-track frequencies
ax1.bar(
    cargo_bin_centers,
    binned_atb_count_cargo,
    width=0.3e6,
    label='Binned distances',
    color='darkslateblue'
)

# add ais-curated frequency data 
good_data = atb_df['Cargo Capacity (liters)']>0
ax1.scatter(
    atb_df['Cargo Capacity (liters)'][good_data],
    atb_data_df['COUNT'][good_data],
    50,
    label='AIS total distance and capacity by ship',
    color='yellowgreen'
)

#labels, etc.
plt.xlabel('ATB cargo capacity (liters)',fontsize=12)
plt.ylabel('Distance traveled (km)',fontsize=12)
plt.tight_layout()
ax1.legend()
#plt.savefig('ATB_frequency_ais.png')
plt.show() 

In [None]:
oil_attrs['vessel_attributes']['tanker']

In [None]:
oil_attrs['vessel_attributes']['tanker']['length_bins']

In [None]:
oil_attrs['vessel_attributes']['atb']['fuel_capacity_bins']

In [None]:
bins = []
center_points = []
for index in range(len(tanker_df['min length (m)']-1)):
    bins.append(
        (tanker_df['min length (m)'][index].tolist(), 
         tanker_df['max length (m)'][index].tolist()+1)
    )
    step_size = (
        tanker_df['max length (m)'][index].tolist() - 
        tanker_df['min length (m)'][index].tolist()
    )
    center_points.append(tanker_df['min length (m)'][index] + step_size/2)



In [None]:
bins

In [None]:
tanker_df['min length (m)'].tolist()

In [None]:
sum(oil_attrs['vessel_attributes']['atb']['fuel_capacity_probability'])

In [None]:
numerator = binned_atb_count_by_fuel
denomintor = binned_atb_count_by_fuel.sum()
fuel_weights = numpy.around(
        numerator / denominator,
        decimals = 2
)

In [None]:
sum(fuel_weights)