In [1]:
# import necessary packages
import os
import pandas
import numpy
import matplotlib.pyplot as plt
import geopandas as gpd
from pathlib import Path
# import function for querying DOE dataframes
from monte_carlo_utils import get_DOE_atb



# set file location and name
atb_shp_path = Path('/data/MIDOSS/shapefiles/atb_2018_01.shp')
barge_shp_path = Path('/data/MIDOSS/shapefiles/barge_2018_01.shp')

# import shapefile using geopandas
allTracks = {} 
allTracks['atb']= gpd.read_file(atb_shp_path)
allTracks['barge'] = gpd.read_file(barge_shp_path)

# view  the top 6 lines of attribute table of data
allTracks['atb'].head(1)

Unnamed: 0,MMSI_NUM,ST_DATE,EN_DATE,LENGTH,TO,FROM_,geometry
0,316001223.0,2018-01-01 00:03:48,2018-01-01 00:05:58,41,US,US,"LINESTRING (-122.77574 48.33663, -122.77796 48..."


In [2]:
attribution = ['US','Canada','Pacific']
noNone = {}
allNone = {}
generic = {}

for vessel_type in ["atb",'barge']:
    generic[vessel_type] = allTracks[vessel_type].loc[
        allTracks[vessel_type].TO.isin(attribution)
    ]

In [3]:
generic["atb"].head(1)

Unnamed: 0,MMSI_NUM,ST_DATE,EN_DATE,LENGTH,TO,FROM_,geometry
0,316001223.0,2018-01-01 00:03:48,2018-01-01 00:05:58,41,US,US,"LINESTRING (-122.77574 48.33663, -122.77796 48..."


## ATB

In [4]:
# drop rows with None attribution
noNone['atb'] = allTracks['atb'].dropna().reset_index(drop=True)
# keep rows with None attribution are missing
shp_tmp = allTracks['atb'].isnull()
row_has_None = shp_tmp.any(axis=1)
allNone['atb'] = allTracks['atb'][row_has_None]
#print results
print(allTracks['atb'].shape[0])
print(noNone['atb'].shape[0])
print(allNone['atb'].shape[0])
print(f'None + NoNone = {allNone["atb"].shape[0] + noNone["atb"].shape[0]}')

27221
16324
10897
None + NoNone = 27221


In [5]:
print(f'Fraction of ATB tracks that are unattributed: {allNone["atb"].shape[0]/allTracks["atb"].shape[0]}')
print(f'Fraction of ATB tracks that are attributed: {noNone["atb"].shape[0]/allTracks["atb"].shape[0]}')
print(f'Fraction of ATB tracks that are generic attribution: {generic["atb"].shape[0]/allTracks["atb"].shape[0]}')

Fraction of ATB tracks that are unattributed: 0.40031593255207376
Fraction of ATB tracks that are attributed: 0.5996840674479262
Fraction of ATB tracks that are generic attribution: 0.3468278167591198


## Barges

In [8]:
vessel_type = "barge"
# drop rows with None attribution
noNone[vessel_type] = allTracks[vessel_type].dropna().reset_index(drop=True)
# keep rows with None attribution are missing
shp_tmp = allTracks[vessel_type].isnull()
row_has_None = shp_tmp.any(axis=1)
allNone[vessel_type] = allTracks[vessel_type][row_has_None]
#print results
print(allTracks[vessel_type].shape[0])
print(noNone[vessel_type].shape[0])
print(allNone[vessel_type].shape[0])
print(f'None + NoNone = {allNone[vessel_type].shape[0] + noNone[vessel_type].shape[0]}')
print(f'Fraction of {vessel_type} tracks that are unattributed: {allNone[vessel_type].shape[0]/allTracks[vessel_type].shape[0]}')
print(f'Fraction of {vessel_type} tracks that are attributed: {noNone[vessel_type].shape[0]/allTracks[vessel_type].shape[0]}')
print(f'Fraction of {vessel_type} tracks that are generic attribution: {generic[vessel_type].shape[0]/allTracks[vessel_type].shape[0]}')

624426
372124
252302
None + NoNone = 624426
Fraction of barge tracks that are unattributed: 0.4040542834539241
Fraction of barge tracks that are attributed: 0.5959457165460759
Fraction of barge tracks that are generic attribution: 0.5009192442339044
