In [None]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
import geopandas as gpd
import contextily as cx

In [None]:
# from maritime_clean.py
def get_elevs(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    tables = soup.find_all("pre")

    # Table 1 is smaller than table 2 and 3 by one column
    # Start with table 1
    tabletext = tables[0]
    columns = [
        "Station_ID",
        "Site_Elevation",
        "Air_Temp_Elevation",
        "Anemometer_Elevation",
        "Barometer_Elevation",
    ]
    table = tabletext.get_text().rsplit("ELEVATION", 1)[1]  # Remove headers
    table = table.split()  # Remove whitespace
    # Should be 5 for table 0 and 6 for table 1+2
    composite_list = [
        table[x : x + 5] for x in range(0, len(table), 5)
    ]  # Split into rows
    df = pd.DataFrame(composite_list)
    df.columns = columns
    df["Tide_Reference"] = (
        np.NAN
    )  # Add 6th column -- don't really  need this column, drop in update
    #     df = df.reindex(columns = ["Station_ID", "Site_Elevation", "Air_Temp_Elevation", "Anemometer_Elevation", "Tide_Reference", "Barometer_Elevation"])

    # Table 2 has 6 columns
    tabletext = tables[1]
    columns = [
        "Station_ID",
        "Site_Elevation",
        "Air_Temp_Elevation",
        "Anemometer_Elevation",
        "Tide_Reference",
        "Barometer_Elevation",
    ]
    table = tabletext.get_text().rsplit("ELEVATION", 1)[1]  # Remove headers
    table = table.split()  # Remove whitespace
    # Should be 5 for table 0 and 6 for table 1+2
    composite_list = [
        table[x : x + 6] for x in range(0, len(table), 6)
    ]  # Split into rows
    dftemp = pd.DataFrame(composite_list)
    dftemp.columns = columns
    df = pd.concat([df, dftemp])
    df = df.reset_index(drop=True)

    #   Table 3 has 9 columns, but we only want the first 6
    tabletext = tables[2]
    columns = [
        "Station_ID",
        "Site_Elevation",
        "Air_Temp_Elevation",
        "Anemometer_Elevation",
        "Tide_Reference",
        "Barometer_Elevation",
    ]
    table = tabletext.get_text().rsplit("CIRCLE", 1)[1]  # Remove headers
    table = table.split()  # Remove whitespace
    # Should be 5 for table 0 and 6 for table 1+2
    composite_list = [
        table[x : x + 9] for x in range(0, len(table), 9)
    ]  # Split into rows
    dftemp = pd.DataFrame(composite_list)
    dftemp = dftemp.iloc[:, 0:6]  # Drop last three columns
    dftemp.columns = columns
    df = pd.concat([df, dftemp])
    df = df.reset_index(drop=True)
    # print(df) # testing
    return df

In [None]:
url = "https://www.ndbc.noaa.gov/bmanht.shtml"
elevs_df = get_elevs(url)
elevs_df

In [None]:
mar_stns = pd.read_csv("stationlist_MARITIME.csv")
mar_stns = mar_stns.iloc[:, 3:]  # cleaning up empty cols
ndbc_stns = pd.read_csv("stationlist_NDBC.csv")
ndbc_stns = ndbc_stns.iloc[:, 3:]  # cleaning up empty cols

In [None]:
# returns dataframe of stations that have DO NOT have a valid elevation and are in the network stationlist
na_elevs = elevs_df[elevs_df.Site_Elevation.str.contains("NA")]
ndbc_elevs = na_elevs[(na_elevs["Station_ID"].isin(ndbc_stns["STATION_ID"]))]
mar_elevs = na_elevs[(na_elevs["Station_ID"].isin(mar_stns["STATION_ID"]))]

In [None]:
# subset from each network's station list those buoys from elevs_df that have nan elevation value
empty_ndbc = ndbc_stns[(ndbc_stns["STATION_ID"].isin(ndbc_elevs["Station_ID"]))]
empty_mar = mar_stns[(mar_stns["STATION_ID"].isin(mar_elevs["Station_ID"]))]

# these were identified by running maritime_clean script and which buoys outputted valid data
mar_valid_wx_buoys = [
    "chao3",
    "pxac1",
    "smoc1",
    "agxc1",
    "baxc1",
    "mlto3",
    "ohbc1",
    "okxc1",
    "omhc1",
    "pfdc1",
    "pfxc1",
    "ppxc1",
    "prjc1",
    "psxc1",
]
ndbc_valid_wx_buoys = [
    "46089",
    "46109",
    "46110",
    "46111",
    "46112",
    "46113",
    "46274",
    "46235",
]

# grab the buoys out of the empty_buoy dfs that have valid wx data
mar_wx_buoys = empty_mar[(empty_mar["STATION_ID"].isin(mar_valid_wx_buoys))]
ndbc_wx_buoys = empty_ndbc[(empty_ndbc["STATION_ID"].isin(ndbc_valid_wx_buoys))]

# merge these two together
nan_elev_buoys = pd.concat([mar_wx_buoys, ndbc_wx_buoys], axis=0)
nan_elev_buoys

In [None]:
# set up figure
gdf = gpd.GeoDataFrame(
    nan_elev_buoys,
    geometry=gpd.points_from_xy(nan_elev_buoys.LONGITUDE, nan_elev_buoys.LATITUDE),
)
# gdf = gdf.query('45 < LATITUDE < 52')
# gdf = gdf.query('-117.5 < LONGITUDE < -117')
gdf.set_crs(epsg=4326, inplace=True)  # Set CRS
gdf_wm = gdf.to_crs(epsg=3857)  # Web mercator

# clip basemap to buoys
shapepath = "tl_2021_us_state.shp"
us = gpd.read_file(shapepath)
us = us.to_crs(epsg=3857)
gdf_us = gdf_wm.clip(us)

# plot figure
ax = gdf_wm.plot(
    "STATION_ID", figsize=(10, 10), markersize=20, legend=True, cmap="tab20"
)
cx.add_basemap(ax, source=cx.providers.Stamen.TonerLite)
ax.set_axis_off()

# ax.figure.savefig("empty_buoys_all_fnl.pdf", format='pdf', bbox_inches='tight')