# Working with Geospatial Data in Python

**Data Sources**

- [National Oceanic and Atmospheric Administration's Wrecks and Obstructions Database](https://nauticalcharts.noaa.gov/data/wrecks-and-obstructions.html): collection of known wrecks and obstructions in US coastal waters courtesy of the Coast Survey's Automated Wreck and Obstruction Information System (AWOIS) and the Electronic Navigational Chart (ENC) data


## Import Python Packages and Data

In [1]:
# Package imports
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely

import matplotlib.pyplot as plt
# import contextily as ctx
import folium

%matplotlib inline

In [2]:
# Read AWOIS Wreck shapefile into GeoDataFrames
awois_wrecks = gpd.read_file('./data/AWOIS_Wrecks/AWOIS_Wrecks.shp', driver='shapefile')

# Keep only rows in geographic regions B and C (Southern MA to Northern NJ)
awois_wrecks = awois_wrecks[awois_wrecks['AREA_ID'].str.contains('B|C')]

awois_wrecks.head()

Unnamed: 0,RECRD,VESSLTERMS,AREA_ID,CHART,LATDEC,LONDEC,GP_QUALITY,GP_SOURCE,DEPTH,SOUNDING_T,YEARSUNK,HISTORY,REFERENCE,geometry
1093,15129,WRECK,C,12402,40.567114,-74.047717,High,Direct,4,Feet and tenths,,"LNM09/12, USCG District 1-- Added ""4"" wreck an...",,POINT (-74.04772 40.56711)
1094,8909,UNKNOWN,C,12214,38.845972,-74.835139,High,Direct,28,Feet and tenths,,H-10241/94-- OPR-D368-WH; UNCHARTED WRECKAGE A...,,POINT (-74.83514 38.84597)
1095,11992,UNKNOWN,C,12353,40.618333,-73.08025,High,Direct,50,Feet and tenths,,\r\n HISTORY\r\n LNM28/90 (7/11/90)-- ADD SYM...,,POINT (-73.08025 40.61833)
1096,12021,UNKNOWN,C,12214,38.928942,-74.855206,High,Direct,35,Feet and tenths,,H11104/02--OPR-C303-KR; FOUND A SUNKEN WRECK ...,,POINT (-74.85521 38.92894)
1097,12026,UNKNOWN,C,12214,38.903281,-74.814119,High,Direct,34,Feet and tenths,,H11104/02--OPR-C303-KR; FOUND A SUNKEN WRECK ...,,POINT (-74.81412 38.90328)


In [3]:
awois_wrecks.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1771 entries, 1093 to 5346
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   RECRD       1771 non-null   object  
 1   VESSLTERMS  1771 non-null   object  
 2   AREA_ID     1771 non-null   object  
 3   CHART       1765 non-null   object  
 4   LATDEC      1771 non-null   object  
 5   LONDEC      1771 non-null   object  
 6   GP_QUALITY  1750 non-null   object  
 7   GP_SOURCE   1477 non-null   object  
 8   DEPTH       1591 non-null   object  
 9   SOUNDING_T  845 non-null    object  
 10  YEARSUNK    16 non-null     object  
 11  HISTORY     1725 non-null   object  
 12  REFERENCE   28 non-null     object  
 13  geometry    1771 non-null   geometry
dtypes: geometry(1), object(13)
memory usage: 207.5+ KB


In [4]:
# Read AWOIS Obstructions shapefile into GeoDataFrames
awois_obs = gpd.read_file('./data/AWOIS_Obstructions/AWOIS_Obstructions.shp', driver='shapefile')

# Keep only rows in geographic regions B and C (Southern MA to Northern NJ)
awois_obs = awois_obs[awois_obs['AREA_ID'].str.contains('B|C')]

awois_obs.head()

Unnamed: 0,RECRD,VESSLTERMS,AREA_ID,CHART,LATDEC,LONDEC,GP_QUALITY,GP_SOURCE,DEPTH,SOUNDING_T,YEARSUNK,HISTORY,REFERENCE,geometry
1441,15204,OBSTRUCTION,C,12326,40.338361,-73.699722,,Not Provided,24.7,Meters and tenths,,H12627/OPR-B310-FH-13: New wreck identified at...,,POINT (-73.69972 40.33836)
1442,8910,OBSTRUCTION,C,12214,38.821772,-74.829433,High,Direct,0.0,,,HISTORY\r\n H-10241/94-- OPR-D368-WH; UNCHART...,,POINT (-74.82943 38.82177)
1443,8911,OBSTRUCTION,C,12214,38.840908,-74.837733,High,Direct,12.4,Meters and tenths,,HISTORY\r\n H-10241/94-- OPR-D368-WH; UNCHART...,,POINT (-74.83773 38.84091)
1444,8777,OBSTRUCTION,C,12214,38.803025,-74.947608,High,Direct,11.9,Meters and tenths,,HISTORY\r\n H10444/92-93; FE-387/93-- OPR-D36...,,POINT (-74.94761 38.80302)
1445,8778,OBSTRUCTION,C,12214,38.805506,-74.919508,High,Direct,11.5,Meters and tenths,,HISTORY\r\n H10444/92-93; FE-387/93-- OPR-D36...,,POINT (-74.91951 38.80551)


In [5]:
awois_obs.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1218 entries, 1441 to 5274
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   RECRD       1218 non-null   object  
 1   VESSLTERMS  1218 non-null   object  
 2   AREA_ID     1218 non-null   object  
 3   CHART       1211 non-null   object  
 4   LATDEC      1218 non-null   object  
 5   LONDEC      1218 non-null   object  
 6   GP_QUALITY  1203 non-null   object  
 7   GP_SOURCE   1203 non-null   object  
 8   DEPTH       1074 non-null   object  
 9   SOUNDING_T  759 non-null    object  
 10  YEARSUNK    2 non-null      object  
 11  HISTORY     1208 non-null   object  
 12  REFERENCE   0 non-null      object  
 13  geometry    1218 non-null   geometry
dtypes: geometry(1), object(13)
memory usage: 142.7+ KB


In [6]:
# Import ENC info from CSV file into temp DataFrame
tmp_enc = pd.read_csv('./data/ENC_Wrecks/ENC_Wrecks.csv')

tmp_enc.head()

Unnamed: 0,recrd,vesslterms,feature_type,chart,latdec,londec,gp_quality,depth,sounding_type,yearsunk,history,quasou,watlev
0,,,Wrecks - Visible,"US,US,reprt,L-1218/15",9.569,-79.037834,,,,,,,always dry
1,,,Wrecks - Visible,"US,US,reprt,L-1218/15",9.557486,-78.879013,,,,,,,always dry
2,,,Wrecks - Visible,"US,US,reprt,L-1218/15",9.554478,-78.943573,,,,,,,always dry
3,,,Wrecks - Visible,"US,US,reprt,L-1453/14",18.231279,-72.541992,,,,,,,always dry
4,,,Wrecks - Visible,"US,US,reprt,L-1453/14",18.228279,-72.53418,,,,,,,always dry


In [7]:
# Convert into a GeoDataFrame
enc_wrecks = gpd.GeoDataFrame(tmp_enc,
                              geometry=gpd.points_from_xy(
                                  tmp_enc.londec,
                                  tmp_enc.latdec),
                              crs='EPSG:4326')

# Alternative way to set CRS if not done with `crs` parameter
#    The ENC_Wrecks shapefile used WGS84, which is EPSG code 4326
# enc_wrecks.crs = 'EPSG:4326'

enc_wrecks.head()

Unnamed: 0,recrd,vesslterms,feature_type,chart,latdec,londec,gp_quality,depth,sounding_type,yearsunk,history,quasou,watlev,geometry
0,,,Wrecks - Visible,"US,US,reprt,L-1218/15",9.569,-79.037834,,,,,,,always dry,POINT (-79.03783 9.56900)
1,,,Wrecks - Visible,"US,US,reprt,L-1218/15",9.557486,-78.879013,,,,,,,always dry,POINT (-78.87901 9.55749)
2,,,Wrecks - Visible,"US,US,reprt,L-1218/15",9.554478,-78.943573,,,,,,,always dry,POINT (-78.94357 9.55448)
3,,,Wrecks - Visible,"US,US,reprt,L-1453/14",18.231279,-72.541992,,,,,,,always dry,POINT (-72.54199 18.23128)
4,,,Wrecks - Visible,"US,US,reprt,L-1453/14",18.228279,-72.53418,,,,,,,always dry,POINT (-72.53418 18.22828)


In [8]:
enc_wrecks.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 11495 entries, 0 to 11494
Data columns (total 14 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   recrd          0 non-null      float64 
 1   vesslterms     86 non-null     object  
 2   feature_type   11462 non-null  object  
 3   chart          11490 non-null  object  
 4   latdec         11495 non-null  float64 
 5   londec         11495 non-null  float64 
 6   gp_quality     0 non-null      float64 
 7   depth          2971 non-null   float64 
 8   sounding_type  148 non-null    object  
 9   yearsunk       0 non-null      float64 
 10  history        361 non-null    object  
 11  quasou         8816 non-null   object  
 12  watlev         11478 non-null  object  
 13  geometry       11495 non-null  geometry
dtypes: float64(6), geometry(1), object(7)
memory usage: 1.2+ MB


In [9]:
# Remove points outside the general area of interest (southern MA to northern NJ)
#   Longitude extent for area of interest = [-74.95, -63.6]
#   Latitude extent for area of interest = [38.8, 41.8]

# Create a polygon using the lat/lon values
enc_extent = shapely.geometry.box(-74.95, 38.8, -63.6, 41.8, ccw=True)

In [10]:
# Create mask for points of ENC wrecks that fall in polygon
in_extent = enc_wrecks['geometry'].within(enc_extent)

# Update GeoDataFrame keeping only those points
enc_wrecks = enc_wrecks[in_extent]
enc_wrecks.head()

Unnamed: 0,recrd,vesslterms,feature_type,chart,latdec,londec,gp_quality,depth,sounding_type,yearsunk,history,quasou,watlev,geometry
921,,,"Wrecks - Submerged, nondangerous","US,US,graph,Chart 12364",41.022946,-73.185735,,34.1,,,,least depth known,always under water/submerged,POINT (-73.18573 41.02295)
922,,,"Wrecks - Submerged, nondangerous","US,US,graph,Chart 12363",41.029144,-73.175032,,31.3,,,,least depth known,always under water/submerged,POINT (-73.17503 41.02914)
923,,,"Wrecks - Submerged, dangerous","US,US,graph,DD-22759",40.975046,-73.26062,,18.8,,,,least depth known,always under water/submerged,POINT (-73.26062 40.97505)
924,,,"Wrecks - Submerged, dangerous","US,US,reprt,DD-24912",40.948703,-73.202178,,10.3,,,,least depth known,always under water/submerged,POINT (-73.20218 40.94870)
925,,,"Wrecks - Submerged, nondangerous","US,US,graph,BP-191410",41.029722,-73.171556,,31.0,,,,least depth known,always under water/submerged,POINT (-73.17156 41.02972)


In [11]:
enc_wrecks.shape

(1989, 14)

In [12]:
enc_wrecks['vesslterms'].value_counts()

Hen And Chickens         1
Kerry Anne               1
USCSS Robert J Walker    1
F/V Creole Belle         1
Name: vesslterms, dtype: int64

In [13]:
# Read Biela shapefile into GeoDataFrames
biela = gpd.read_file('./data/Biela/Biela.shp', driver='shapefile')

biela.head()

Unnamed: 0,id,Name,descriptio,timestamp,begin,end,altitudeMo,tessellate,extrude,visibility,drawOrder,icon,gx_media_l,geometry
0,73,BIELA,"<img src=""https://doc-08-10-mymaps.googleuserc...",,,,,-1,0,-1,,,https://doc-08-10-mymaps.googleusercontent.com...,POINT Z (-70.91667 40.15000 0.00000)


In [14]:
# Convert geometry to 2D point to conform with other datasets
#   Extra Z dimension is common when data originate from KML files
biela.geometry = biela.geometry.map(lambda polygon: shapely.ops.transform(lambda x, y, z: (x, y), polygon))

biela.head()

Unnamed: 0,id,Name,descriptio,timestamp,begin,end,altitudeMo,tessellate,extrude,visibility,drawOrder,icon,gx_media_l,geometry
0,73,BIELA,"<img src=""https://doc-08-10-mymaps.googleuserc...",,,,,-1,0,-1,,,https://doc-08-10-mymaps.googleusercontent.com...,POINT (-70.91667 40.15000)


## Check and Convert Coordinate Reference Systems

When combining geospatial datasets, the coordinate reference systems for each set must match (otherwise, you'll introduce error). Geopandas makes checking the CRS easy with the `.crs` attribute, which displays the EPSG code for that GeoDataFrame's CRS. The AWOIS datasets both have coordinates in NAD83 (EPSG code `4269`) and the ENC dataset uses WGS84 (EPSG `4326`), which is a common CRS for web data.

All analysis done in the rest of the notebook will be mapped with folium, which assumes datasets are in WGS84, so all GeoDataFrames are converted to this CRS. Web tile providers typically use the spherical mercator (aka web mercator, EPSG code `3857`) projection, but folium does the projection conversion under the hood automatically for you, so it's not necessary to layer the projection onto the dataset coordinates.

The [Spatial Reference website](www.spatialreference.org) is a good resource to look up EPSG codes.

In [15]:
awois_wrecks.crs

<Geographic 2D CRS: EPSG:4269>
Name: NAD83
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: North America - NAD83
- bounds: (167.65, 14.92, -47.74, 86.46)
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [16]:
# Check the units the CRS uses
awois_wrecks.crs.axis_info[0].unit_name

'degree'

In [17]:
awois_obs.crs

<Geographic 2D CRS: EPSG:4269>
Name: NAD83
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: North America - NAD83
- bounds: (167.65, 14.92, -47.74, 86.46)
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [18]:
enc_wrecks.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [19]:
# Check the units the CRS uses
enc_wrecks.crs.axis_info[0].unit_name

'degree'

In [20]:
biela.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [21]:
# Convert AWOIS datasets from NAD83 to WGS84

# Alternatives: gdf.crs = 'EPSG:4326' or gdf.to_crs('EPSG:4326', inplace=True)
awois_wrecks.to_crs(epsg=4326, inplace=True)
awois_obs.to_crs(epsg=4326, inplace=True)

# Confirm the conversion worked and all CRSs are same
print('AWOIS Wrecks CRS: {}'.format(awois_wrecks.crs))
print('AWOIS Obstructions CRS: {}'.format(awois_obs.crs))
print('ENC Wrecks CRS: {}'.format(enc_wrecks.crs))
print('Biela Wreck CRS: {}'.format(biela.crs))

AWOIS Wrecks CRS: epsg:4326
AWOIS Obstructions CRS: epsg:4326
ENC Wrecks CRS: EPSG:4326
Biela Wreck CRS: epsg:4326


## Combine and Clean Datasets

Some light data cleaning to create a feature that categorizes each type of object in order to color the data points in the map. It also separates all the data into two GeoDataFrames by whether the object is identified / visible or not.

- Consolidate the `VESSLTERMS` values in the AWOIS datasets into new column `OBJTYPE`, adding a `KNOWN WRECK` label for low count values that appear to be a name of a vessel
- Add `OBJTYPE` column to ENC dataset and label visible wrecks as `KNOWN WRECK`, everything else as `UNKNOWN`
- Save all known/visible wrecks into separate GeoDataFrame
- Combine everything else into an unknown object GeoDataFrame

In [22]:
awois_wrecks['VESSLTERMS'].value_counts()

UNKNOWN                 1012
WRECK                    160
OBSTRUCTION                5
SHINNECOCK                 4
YANKEE                     3
                        ... 
DUAL BARGES WRECK          1
BERANGER                   1
BIG O                      1
ORMOND                     1
CHARLIES LAST CHANCE       1
Name: VESSLTERMS, Length: 567, dtype: int64

In [23]:
awois_obs['VESSLTERMS'].value_counts()

OBSTRUCTION    1207
FISH HAVEN        6
UNKNOWN           4
DUMP SITE         1
Name: VESSLTERMS, dtype: int64

In [24]:
# Helper function to consolidate categories for AWOIS datasets
def obj_cat_awois(row):
    """
    Retains `VESSLTERMS` values for main categories, replaces low-count
        ones with `KNOWN WRECK` (assumes the value was the name of
        the vessel).

    :param row: row of GeoDataFrame with a `VESSLTERMS` column that
        contains a string describing the vessel
    :return: str of what that row's new, consolidated category is
    
    >>>gdf['consol_cats'] = gdf.apply(obj_cat_awois, axis=1)
    """
    cat = row['VESSLTERMS']
    
    if cat in ['UNKNOWN', 'WRECK', 'OBSTRUCTION', 'FISH HAVEN', 'DUMP SITE']:
        return cat
    else:
        return 'KNOWN WRECK'

awois_wrecks['OBJTYPE'] = awois_wrecks.apply(obj_cat_awois, axis=1)
awois_obs['OBJTYPE'] = awois_obs.apply(obj_cat_awois, axis=1)

In [25]:
awois_wrecks['OBJTYPE'].value_counts()

UNKNOWN        1012
KNOWN WRECK     593
WRECK           160
OBSTRUCTION       5
FISH HAVEN        1
Name: OBJTYPE, dtype: int64

In [26]:
awois_obs['OBJTYPE'].value_counts()

OBSTRUCTION    1207
FISH HAVEN        6
UNKNOWN           4
DUMP SITE         1
Name: OBJTYPE, dtype: int64

In [27]:
enc_wrecks['feature_type'].value_counts()

Wrecks - Submerged, dangerous       1204
Wrecks - Submerged, nondangerous     519
Wrecks - Visible                     257
distributed remains of wreck           5
Name: feature_type, dtype: int64

In [28]:
enc_wrecks['vesslterms'].value_counts()

Hen And Chickens         1
Kerry Anne               1
USCSS Robert J Walker    1
F/V Creole Belle         1
Name: vesslterms, dtype: int64

In [29]:
# Helper function to create categories for ENC dataset
def obj_cat_enc(row):
    """
    Creates two categories of vessel: `KNOWN WRECK` is for visible
        wrecks or for ones that have a name in the `vesslterms` column,
        `WRECK` is for everything else.
        
    :param row: row of GeoDataFrame with both `feature_type` and`vesslterms`
        columns
    :return: str of what that row's new category is
    
    >>>gdf['consol_cats'] = gdf.apply(obj_cat_enc, axis=1)
    """
    if (row['feature_type'] == 'Wrecks - Visible') or (type(row['vesslterms']) is str):
        return 'KNOWN WRECK'
    else:
        return 'WRECK'

enc_wrecks['OBJTYPE'] = enc_wrecks.apply(obj_cat_enc, axis=1)

In [30]:
enc_wrecks['OBJTYPE'].value_counts()

WRECK          1728
KNOWN WRECK     261
Name: OBJTYPE, dtype: int64

In [31]:
# Add key column to track where rows came from before combining
awois_wrecks['IDX'] = ['AWW_{}'.format(n) for n in range(1, awois_wrecks.shape[0] + 1)]
awois_obs['IDX'] = ['AWO_{}'.format(n) for n in range(1, awois_obs.shape[0] + 1)]
enc_wrecks['IDX'] = ['ENC_{}'.format(n) for n in range(1, enc_wrecks.shape[0] + 1)]

In [32]:
print(awois_wrecks.columns)
print(awois_obs.columns)
print(enc_wrecks.columns)

Index(['RECRD', 'VESSLTERMS', 'AREA_ID', 'CHART', 'LATDEC', 'LONDEC',
       'GP_QUALITY', 'GP_SOURCE', 'DEPTH', 'SOUNDING_T', 'YEARSUNK', 'HISTORY',
       'REFERENCE', 'geometry', 'OBJTYPE', 'IDX'],
      dtype='object')
Index(['RECRD', 'VESSLTERMS', 'AREA_ID', 'CHART', 'LATDEC', 'LONDEC',
       'GP_QUALITY', 'GP_SOURCE', 'DEPTH', 'SOUNDING_T', 'YEARSUNK', 'HISTORY',
       'REFERENCE', 'geometry', 'OBJTYPE', 'IDX'],
      dtype='object')
Index(['recrd', 'vesslterms', 'feature_type', 'chart', 'latdec', 'londec',
       'gp_quality', 'depth', 'sounding_type', 'yearsunk', 'history', 'quasou',
       'watlev', 'geometry', 'OBJTYPE', 'IDX'],
      dtype='object')


In [33]:
# Combine known wrecks into one dataset
cols = ['IDX', 'OBJTYPE', 'geometry']

known_wrecks = awois_wrecks[awois_wrecks['OBJTYPE'] == 'KNOWN WRECK'][cols].append(
                   enc_wrecks[enc_wrecks['OBJTYPE'] == 'KNOWN WRECK'][cols],
                   ignore_index=True)

known_wrecks.head()

Unnamed: 0,IDX,OBJTYPE,geometry
0,AWW_28,KNOWN WRECK,POINT (-74.21987 39.32651)
1,AWW_30,KNOWN WRECK,POINT (-72.48330 40.83331)
2,AWW_31,KNOWN WRECK,POINT (-74.29959 39.25012)
3,AWW_40,KNOWN WRECK,POINT (-74.56627 38.90012)
4,AWW_43,KNOWN WRECK,POINT (-74.50172 39.11655)


In [34]:
known_wrecks['OBJTYPE'].value_counts()

KNOWN WRECK    854
Name: OBJTYPE, dtype: int64

In [35]:
# Combine everything unknown into another dataset
unknown = awois_wrecks[awois_wrecks['OBJTYPE'] != 'KNOWN WRECK'][cols].append(
              awois_obs[cols],
              ignore_index=True).append(
                  enc_wrecks[enc_wrecks['OBJTYPE'] != 'KNOWN WRECK'][cols],
                  ignore_index=True)

unknown.head()

Unnamed: 0,IDX,OBJTYPE,geometry
0,AWW_1,WRECK,POINT (-74.04772 40.56711)
1,AWW_2,UNKNOWN,POINT (-74.83514 38.84597)
2,AWW_3,UNKNOWN,POINT (-73.08025 40.61833)
3,AWW_4,UNKNOWN,POINT (-74.85520 38.92895)
4,AWW_5,UNKNOWN,POINT (-74.81412 38.90329)


In [36]:
unknown['OBJTYPE'].value_counts()

WRECK          1888
OBSTRUCTION    1212
UNKNOWN        1016
FISH HAVEN        7
DUMP SITE         1
Name: OBJTYPE, dtype: int64

In [37]:
# Check that re-configured GDFs have same shape as originals
awois_wrecks.shape[0] + awois_obs.shape[0] + enc_wrecks.shape[0] == known_wrecks.shape[0] + unknown.shape[0]

True

## Generate Simulated Data for Other Obstructions

In [38]:
# Set random seed
np.random.seed(42)

n = 100

# Generate uniformly distributed x, y values over different areas
lon_vals = np.concatenate((np.random.uniform(-72.9, -71.9, size=n),
                           np.random.uniform(-71.98, -70.57, size=2*n),
                           np.random.uniform(-71.22, -70.62, size=n),
                           np.random.uniform(-70.58, -69.6, size=n)))
lat_vals = np.concatenate((np.random.uniform(39.6, 40.7, size=n),
                           np.random.uniform(39.8, 40.97, size=2*n),
                           np.random.uniform(39.85, 40.45, size=n),
                           np.random.uniform(39.9, 41.05, size=n)))

# Zip coordinates together as Points
simu_points = [shapely.geometry.Point(lon, lat) for lon, lat in zip(lon_vals, lat_vals)]

# Insert actual ship location
simu_points.insert(9, biela.loc[0, 'geometry'])

In [39]:
# Create GeoDataFrame of simulated points
simu_obs = gpd.GeoDataFrame(simu_points, columns=['geometry'], crs='EPSG:4326')

# Add columns to match other GeoDataFrames
simu_obs['IDX'] = ['SIM_{}'.format(n) for n in range(1, simu_obs.shape[0] + 1)]
simu_obs['OBJTYPE'] = 'UNKNOWN'

print(simu_obs.shape)
simu_obs.head()

(501, 3)


Unnamed: 0,geometry,IDX,OBJTYPE
0,POINT (-72.52546 40.36798),SIM_1,UNKNOWN
1,POINT (-71.94929 40.18971),SIM_2,UNKNOWN
2,POINT (-72.16801 39.94048),SIM_3,UNKNOWN
3,POINT (-72.30134 40.49517),SIM_4,UNKNOWN
4,POINT (-72.74398 40.35320),SIM_5,UNKNOWN


## Helper Functions for Mapping Data

In [40]:
# Helper function to display a folium map in Jupyter notebook
def display_map(m, filename):
    """
    Helper code to ensure the folium map will display
        in different browsers when viewing the Jupyter
        notebook.
    Side effect: saves a local copy of the HTML version
        of the map.
    
    :param m: a folium map
    :param filename: str for map filename to save a copy locally
    :return: IFrame object displaying the saved map
    """
    from IPython.display import IFrame
    m.save(filename)
    return (IFrame(filename,
                   width='100%',
                   height='500px'))


In [59]:
# Helper function to instantiate a generic folium.Map

def make_map(layers=None, location=[40.5, -71.5], zoom_start=8):
    """
    Creates a folium.Map object with consistent parameters
        to use as basemap for all maps in notebook

    :param layers: list or None, layers to add to map. Default
        is None
    :return: folium.Map object
    """

    m = folium.Map(location=location,
                   tiles="stamenterrain",
                   zoom_start=zoom_start)
    
    if layers is not None:
        for l in layers:
            m.add_child(l)

    return m

In [42]:
# Helper function to color map points differently by object type
def color_obj(objtype):
    """
    Assigns color brewer qualitative scheme to objects
        depending on the type.
    :param obj: str, category of object type
    :return: str, hexcode for map icon color
    """
    labels = ['KNOWN WRECK',
              'WRECK',
              'OBSTRUCTION',
              'FISH HAVEN',
              'DUMP SITE',
              'UNKNOWN']

    colors = ['#bf5b17',  # brown
              '#bf5b17',  # brown
              '#ffff99',  # yellow
              '#7fc97f',  # green
              '#f0027f',  # hot pink
              '#666666']  # grey

    color_dict = dict(zip(labels, colors))
    return color_dict.get(objtype, '#666666')

In [43]:
# Helper function to create and return a FeatureGroup of circles
#    (This enables functionality on folium map to toggle whether
#    a FeatureGroup layer is visible or not)
def create_featuregroup(gdf, fgname):
    """
    Creates a folium FeatureGroup with given `fgname`, then iterates
        over given GeoDataFrame (`gdf`) to create and add folium.Circle
        markers to the FeatureGroup. Returns the FeatureGroup
    
    :param gdf: a GeoDataFrame, must have `OBJTYPE` column for color
        function to work
    :param fgname: str, the name for the FeatureGroup
    :return: folium.FeatureGroup object
    """
    # Create the FeatureGroup
    fg = folium.FeatureGroup(name=fgname,
                             overlay=True,
                             control=True,
                             show=True)

    # Iterate over GeoDataFrame, create circles for each row, add to FeatureGroup
    for idx, row in gdf.iterrows():
        lat = row['geometry'].y
        lon = row['geometry'].x
        folium.Circle(location=[lat, lon],
                      radius=10,
                      color=color_obj(row['OBJTYPE'])).add_to(fg)

    return fg

In [44]:
# Helper function for meter-nautical mile conversions

NM_CONVERSION = 0.0005399565

def nm_to_m(nm):
    """
    Converts nautical miles to meters
    
    :param nm: float, nautical miles
    :return: float
    """
    return nm / NM_CONVERSION

def m_to_nm(m):
    """
    Converts nautical miles to meters
    
    :param nm: float, nautical miles
    :return: float
    """
    return m * NM_CONVERSION

## The Search Begins

In [45]:
# Create a FeatureGroup of colored circles for known wrecks
#    and uknown objects that will be plotted for all maps
unk_layer = create_featuregroup(unknown, 'Unknown Objs')
kwn_layer = create_featuregroup(known_wrecks, 'Known Wrecks')
simu_layer = create_featuregroup(simu_obs, 'Simulated Objs')
layers_all = [unk_layer, kwn_layer, simu_layer]

In [60]:
# Create a map of obstructions

# Default latitude and longitude to center the map
start_lat = 40.5
start_lon = -71.5

m = make_map(layers=layers_all, location=[start_lat, start_lon])

# Add LayerControl to toggle layers on/off 
folium.LayerControl(collapsed=False).add_to(m)

# Display map
display_map(m, 'm_1.html')

### Establish Search Area Given Two Estimated Locations of Collision

The original estimate of where the collision happened came from insurance archival holdings, but conflicting press reports left a lot of uncertainty. This resulted in an excessively large search area, and combined with the collision happening in deep waters, made the search difficult for a long time. However, further research uncovered a legal claim where the captains of both vessels gave location information about the collision, which were within 11 nautical miles of each other, thus greatly reducing the search area.

The new search area looked at all known 'hangs' in a ~700 square nautical mile area and reviewed them for likely candidates.

In [47]:
# Initial estimate of the coordinates of where the collision happened
b_lat_1 = 40.094
b_lon_1 = -70.986
coord_1 = shapely.geometry.Point(b_lon_1, b_lat_1)

b_lat_2 = 40.184
b_lon_2 = -70.838
coord_2 = shapely.geometry.Point(b_lon_2, b_lat_2)

# Combine into a linestring oject
coords_ls = gpd.GeoSeries(shapely.geometry.LineString([coord_1, coord_2]),
                          crs='EPSG:4326')

# Convert CRS into projected coordinates to calculate distances in meters
coords_ls = coords_ls.to_crs('epsg:3857')

# Check the CRS unit of measurement
print(coords_ls.crs.axis_info[0].unit_name)

metre


In [48]:
# Calculate the distance between the two coordinates
dist_bn_coords = coords_ls.length

print('Distance between coordinates in meters: {0:,.0f}'.format(dist_bn_coords[0]))
print('Distance between coordinates in nautical miles: {0:.1f}'.format(m_to_nm(dist_bn_coords[0])))

Distance between coordinates in meters: 21,052
Distance between coordinates in nautical miles: 11.4


In [49]:
# Calculate the centroid between the two points
centroid = coords_ls.centroid

# Create a buffer around the centroid of ~700 squared nautical miles
buff = centroid.buffer(27645)
buff

0    POLYGON ((-7866242.731 4886164.158, -7866375.8...
dtype: geometry

In [68]:
# Create basemap
centroid_lon = centroid.to_crs('epsg:4326')[0].x
centroid_lat = centroid.to_crs('epsg:4326')[0].y

m_2 = make_map(layers=layers_all, 
               location=[centroid_lat, centroid_lon],
               zoom_start=9)

# Plot the 700nm^2 buffer around the centroid between the coordinates
folium.GeoJson(buff.to_crs('epsg:4326').to_json(), name="Search Area").add_to(m_2)

# Plot the approximate coordinates for the Biela, color green
folium.Circle([b_lat_1, b_lon_1], radius=10, color='#cfff04').add_to(m_2)
folium.Circle([b_lat_2, b_lon_2], radius=10, color='#cfff04').add_to(m_2)

# Add LayerControl to toggle layers on/off 
folium.LayerControl(collapsed=False).add_to(m_2)

# Display map
display_map(m_2, 'm_2.html')

### Find All un-Identified Hangs that are Within the Search Area

In [51]:
# Combine all unknown items into one GeoDataFrame
all_unknown = unknown.append(simu_obs, ignore_index=True)

# Convert to projected CRS
all_unknown.to_crs('EPSG:3857', inplace=True)

print(all_unknown.shape)
all_unknown.head()

(4625, 3)


Unnamed: 0,IDX,OBJTYPE,geometry
0,AWW_1,WRECK,POINT (-8242954.142 4948699.211)
1,AWW_2,UNKNOWN,POINT (-8330609.567 4699632.337)
2,AWW_3,UNKNOWN,POINT (-8135256.194 4956207.612)
3,AWW_4,UNKNOWN,POINT (-8332843.241 4711499.019)
4,AWW_5,UNKNOWN,POINT (-8328269.471 4707827.719)


In [52]:
s_idx = all_unknown['geometry'].within(buff.loc[0])
sum(s_idx)

57

In [53]:
# Calculate and filter for points within the search area buffer
# gdf_within = gdfdata.loc[gdfdata.geometry.within(gdfselbuff.unary_union)]
search_points = all_unknown.loc[s_idx]

print(search_points.shape)
search_points.head()

(57, 3)


Unnamed: 0,IDX,OBJTYPE,geometry
4066,ENC_1932,WRECK,POINT (-7870134.768 4895310.027)
4087,ENC_1953,WRECK,POINT (-7903718.188 4881611.603)
4088,ENC_1954,WRECK,POINT (-7887047.983 4868362.645)
4133,SIM_10,UNKNOWN,POINT (-7894407.222 4887763.873)
4226,SIM_103,UNKNOWN,POINT (-7912885.662 4884262.007)


In [54]:
# Narrow down search to top 5
top_5 = search_points.loc[(4087, 4133, 4321, 4451, 4482), :]
top_5

Unnamed: 0,IDX,OBJTYPE,geometry
4087,ENC_1953,WRECK,POINT (-7903718.188 4881611.603)
4133,SIM_10,UNKNOWN,POINT (-7894407.222 4887763.873)
4321,SIM_198,UNKNOWN,POINT (-7898830.594 4885972.363)
4451,SIM_328,UNKNOWN,POINT (-7891147.888 4874346.281)
4482,SIM_359,UNKNOWN,POINT (-7889049.191 4885323.310)


In [55]:
# Combine points into one geometry to find convex hull
union = top_5['geometry'].unary_union

# Create convex hull and calculate new search area
cvxhull = gpd.GeoSeries(union.convex_hull, crs='EPSG:3857')

print('Original search area in square meters: {0: ,.0f}'.format(buff[0].area))
print('New search area in square meters: {0:,.0f}'.format(cvxhull[0].area))

Original search area in square meters:  2,397,094,716
New search area in square meters: 109,726,705


In [71]:
# Map the top points and reduced search area
top_layer = create_featuregroup(top_5.to_crs('EPSG:4326'), 'Top Points')

# Create basemap
m_3 = make_map(layers=[top_layer],
               location=[centroid_lat, centroid_lon],
               zoom_start=11)

# Plot the reduced search area
folium.GeoJson(cvxhull.to_crs('epsg:4326').to_json(), name="Search Area").add_to(m_3)

# Plot the approximate coordinates for the Biela, color green
folium.Circle([b_lat_1, b_lon_1], radius=10, color='#cfff04').add_to(m_3)
folium.Circle([b_lat_2, b_lon_2], radius=10, color='#cfff04').add_to(m_3)

# Add a popup to show lat, lon where user clicks
folium.features.LatLngPopup().add_to(m_3)

# Add LayerControl to toggle layers on/off 
folium.LayerControl(collapsed=False).add_to(m_3)

# Display map
display_map(m_3, 'm_3.html')

## Wreckhunting Success!

In [73]:
# Get actual coordinates for where wreck was found
b_lat = biela['geometry'].y[0]
b_lon = biela['geometry'].x[0]

# Create basemap
m_final = make_map(layers=layers_all,
                   location=[centroid_lat, centroid_lon],
                   zoom_start=10)

# Plot the reduced search area
folium.GeoJson(cvxhull.to_crs('epsg:4326').to_json(), name="Search Area").add_to(m_final)

# Plot the approximate coordinates for the Biela, color 
folium.Circle([b_lat_1, b_lon_1], radius=10, color='#cfff04').add_to(m_final)
folium.Circle([b_lat_2, b_lon_2], radius=10, color='#cfff04').add_to(m_final)

# Create and plot a marker for the Biela
folium.Marker([b_lat, b_lon],
              popup='<b>BIELA</b><br/>Lat: {0:.2f}<br/>Lon: {1:.2f}'.format(b_lat, b_lon),
              icon=folium.map.Icon(icon='ship',
                                   prefix='fa')).add_to(m_final)

# Add LayerControl to toggle layers on/off 
folium.LayerControl(collapsed=False).add_to(m_final)

# Display map
display_map(m_final, 'm_final.html')

In [58]:
# Calculate the distance to Montauk, NY (source: Google maps)
montauk_coords = gpd.GeoSeries(shapely.geometry.Point(-71.94990647252806,
                                                      41.03664232633056), 
                               crs='EPSG:4326')

# Convert to projected coordinates to calculate distances accurately
montauk_coords = montauk_coords.to_crs('epsg:3857')

dist_to_wreck = montauk_coords.distance(biela['geometry'].to_crs('EPSG:3857'))
print('Distance in meters: {0:,.0f}'.format(dist_to_wreck[0]))
print('Distance in nautical miles: {0:.1f}'.format(m_to_nm(dist_to_wreck[0])))

Distance in meters: 173,567
Distance in nautical miles: 93.7
