# Working with Geospatial Data in Python

**Data Sources**

- [National Oceanic and Atmospheric Administration's Wrecks and Obstructions Database](https://nauticalcharts.noaa.gov/data/wrecks-and-obstructions.html): collection of known wrecks and obstructions in US coastal waters courtesy of the Coast Survey's Automated Wreck and Obstruction Information System (AWOIS)


In [1]:
# Package imports
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely

import matplotlib.pyplot as plt
import contextily as ctx
import folium

%matplotlib inline

In [5]:
# Read AWOIS Wreck shapefile into GeoDataFrames
awois_wrecks = gpd.read_file('./data/AWOIS_Wrecks/AWOIS_Wrecks.shp', driver='shapefile')

# Keep only rows in geographic regions B and C (Southern MA to Northern NJ)
awois_wrecks = awois_wrecks[awois_wrecks['AREA_ID'].str.contains('B|C')]

awois_wrecks.head()

Unnamed: 0,RECRD,VESSLTERMS,AREA_ID,CHART,LATDEC,LONDEC,GP_QUALITY,GP_SOURCE,DEPTH,SOUNDING_T,YEARSUNK,HISTORY,REFERENCE,geometry
1093,15129,WRECK,C,12402,40.567114,-74.047717,High,Direct,4,Feet and tenths,,"LNM09/12, USCG District 1-- Added ""4"" wreck an...",,POINT (-74.04772 40.56711)
1094,8909,UNKNOWN,C,12214,38.845972,-74.835139,High,Direct,28,Feet and tenths,,H-10241/94-- OPR-D368-WH; UNCHARTED WRECKAGE A...,,POINT (-74.83514 38.84597)
1095,11992,UNKNOWN,C,12353,40.618333,-73.08025,High,Direct,50,Feet and tenths,,\r\n HISTORY\r\n LNM28/90 (7/11/90)-- ADD SYM...,,POINT (-73.08025 40.61833)
1096,12021,UNKNOWN,C,12214,38.928942,-74.855206,High,Direct,35,Feet and tenths,,H11104/02--OPR-C303-KR; FOUND A SUNKEN WRECK ...,,POINT (-74.85521 38.92894)
1097,12026,UNKNOWN,C,12214,38.903281,-74.814119,High,Direct,34,Feet and tenths,,H11104/02--OPR-C303-KR; FOUND A SUNKEN WRECK ...,,POINT (-74.81412 38.90328)


In [6]:
awois_wrecks.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1771 entries, 1093 to 5346
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   RECRD       1771 non-null   object  
 1   VESSLTERMS  1771 non-null   object  
 2   AREA_ID     1771 non-null   object  
 3   CHART       1765 non-null   object  
 4   LATDEC      1771 non-null   object  
 5   LONDEC      1771 non-null   object  
 6   GP_QUALITY  1750 non-null   object  
 7   GP_SOURCE   1477 non-null   object  
 8   DEPTH       1591 non-null   object  
 9   SOUNDING_T  845 non-null    object  
 10  YEARSUNK    16 non-null     object  
 11  HISTORY     1725 non-null   object  
 12  REFERENCE   28 non-null     object  
 13  geometry    1771 non-null   geometry
dtypes: geometry(1), object(13)
memory usage: 207.5+ KB


In [7]:
awois_wrecks['VESSLTERMS'].value_counts()

UNKNOWN              1012
WRECK                 160
OBSTRUCTION             5
SHINNECOCK              4
SAN DIEGO               3
                     ... 
VALIANT                 1
MARIE & KATHERINE       1
TEXAS                   1
JACOB HASKELL           1
AYRESHIRE               1
Name: VESSLTERMS, Length: 567, dtype: int64

In [9]:
# Read AWOIS Obstructions shapefile into GeoDataFrames
awois_obs = gpd.read_file('./data/AWOIS_Obstructions/AWOIS_Obstructions.shp', driver='shapefile')

# Keep only rows in geographic regions B and C (Southern MA to Northern NJ)
awois_obs = awois_obs[awois_obs['AREA_ID'].str.contains('B|C')]

awois_obs.head()

Unnamed: 0,RECRD,VESSLTERMS,AREA_ID,CHART,LATDEC,LONDEC,GP_QUALITY,GP_SOURCE,DEPTH,SOUNDING_T,YEARSUNK,HISTORY,REFERENCE,geometry
1441,15204,OBSTRUCTION,C,12326,40.338361,-73.699722,,Not Provided,24.7,Meters and tenths,,H12627/OPR-B310-FH-13: New wreck identified at...,,POINT (-73.69972 40.33836)
1442,8910,OBSTRUCTION,C,12214,38.821772,-74.829433,High,Direct,0.0,,,HISTORY\r\n H-10241/94-- OPR-D368-WH; UNCHART...,,POINT (-74.82943 38.82177)
1443,8911,OBSTRUCTION,C,12214,38.840908,-74.837733,High,Direct,12.4,Meters and tenths,,HISTORY\r\n H-10241/94-- OPR-D368-WH; UNCHART...,,POINT (-74.83773 38.84091)
1444,8777,OBSTRUCTION,C,12214,38.803025,-74.947608,High,Direct,11.9,Meters and tenths,,HISTORY\r\n H10444/92-93; FE-387/93-- OPR-D36...,,POINT (-74.94761 38.80302)
1445,8778,OBSTRUCTION,C,12214,38.805506,-74.919508,High,Direct,11.5,Meters and tenths,,HISTORY\r\n H10444/92-93; FE-387/93-- OPR-D36...,,POINT (-74.91951 38.80551)


In [10]:
awois_obs.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1218 entries, 1441 to 5274
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   RECRD       1218 non-null   object  
 1   VESSLTERMS  1218 non-null   object  
 2   AREA_ID     1218 non-null   object  
 3   CHART       1211 non-null   object  
 4   LATDEC      1218 non-null   object  
 5   LONDEC      1218 non-null   object  
 6   GP_QUALITY  1203 non-null   object  
 7   GP_SOURCE   1203 non-null   object  
 8   DEPTH       1074 non-null   object  
 9   SOUNDING_T  759 non-null    object  
 10  YEARSUNK    2 non-null      object  
 11  HISTORY     1208 non-null   object  
 12  REFERENCE   0 non-null      object  
 13  geometry    1218 non-null   geometry
dtypes: geometry(1), object(13)
memory usage: 142.7+ KB


In [11]:
# Read ENC Wrecks shapefile into GeoDataFrames
enc_wrecks = gpd.read_file('./data/ENC_Wrecks/ENC_Wrecks.shp', driver='shapefile')

# Keep only rows in geographic regions B and C (Southern MA to Northern NJ)
# enc_wrecks = enc_wrecks[enc_wrecks['AREA_ID'].str.contains('B|C')]

enc_wrecks.head()

Unnamed: 0,OBJL,CATWRK,CONRAD,CONVIS,EXPSOU,HEIGHT,OBJNAM,QUASOU,SOUACC,TECSOU,...,VERACC,VERDAT,VERLEN,WATLEV,INFORM,SCAMIN,SORDAT,SORIND,DSNM,geometry
0,159.0,5,,,,,,,,,...,,,,2.0,,105000.0,20150717,,US509890.000,POINT (-79.03783 9.56900)
1,159.0,5,,,,,,,,,...,,,,2.0,,105000.0,20150717,,US509890.000,POINT (-78.87901 9.55749)
2,159.0,5,,,,,,,,,...,,,,2.0,,105000.0,20150717,,US509890.000,POINT (-78.94357 9.55448)
3,159.0,5,,,,,,,,,...,,,,2.0,,37500.0,20140603,,US510820.000,POINT (-72.54199 18.23128)
4,159.0,5,,,,,,,,,...,,,,2.0,,37500.0,20140603,,US510820.000,POINT (-72.53418 18.22828)


In [12]:
enc_wrecks.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 11384 entries, 0 to 11383
Data columns (total 21 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   OBJL      11384 non-null  float64 
 1   CATWRK    11342 non-null  object  
 2   CONRAD    4 non-null      float64 
 3   CONVIS    9 non-null      float64 
 4   EXPSOU    3998 non-null   float64 
 5   HEIGHT    4 non-null      float64 
 6   OBJNAM    0 non-null      object  
 7   QUASOU    8707 non-null   object  
 8   SOUACC    0 non-null      object  
 9   TECSOU    85 non-null     object  
 10  VALSOU    0 non-null      object  
 11  VERACC    0 non-null      object  
 12  VERDAT    0 non-null      object  
 13  VERLEN    0 non-null      object  
 14  WATLEV    11384 non-null  float64 
 15  INFORM    0 non-null      object  
 16  SCAMIN    1918 non-null   float64 
 17  SORDAT    11383 non-null  object  
 18  SORIND    0 non-null      object  
 19  DSNM      11384 non-null  object  
 20

In [13]:
# lat_extent = [38.8, 41.8]
# lon_extent = [-74.95, -63.6]