In [22]:
## import needed libraries
import pandas as pd
import requests
import os
import arcpy
import zipfile
from arcpy import env
from arcgis.features import GeoAccessor, GeoSeriesAccessor
from arcpy.sa import *
from bs4 import BeautifulSoup
from io import StringIO
from arcgis.gis import GIS
from arcgis.features import FeatureLayerCollection

arcpy.env.overwriteOutput = True

station_list = 'https://www.ndbc.noaa.gov/activestations.xml'
test_station = "apqf1"
ndbc_rt_source = "https://www.ndbc.noaa.gov/data/realtime2/"
station_page = 'https://www.ndbc.noaa.gov/station_page.php?station='+ test_station.capitalize()
gdb_path = r"E:\gis_projects\GOMA NERRS"
gdb_name = "NERRS.gdb"
gdb = os.path.join(gdb_path, gdb_name)
item_id = 'e94461a2ab9042f9bd5d637fb820eb71'

In [23]:
## Create the Station List Feature Class ##
# if the gdb does not exist, create it
if not arcpy.Exists(gdb):
    arcpy.management.CreateFileGDB(gdb_path, gdb_name)

# Fetch the XML data from the station_list URL
response = requests.get(station_list)
xml_data = response.content

# Parse the XML data using BeautifulSoup
soup = BeautifulSoup(xml_data, 'xml')

# Extract station information
stations = []
for station in soup.find_all('station'):
    station_info = {
        'id': station.get('id'),
        'name': station.get('name'),
        'lat': station.get('lat'),
        'lon': station.get('lon'),
        'owner': station.get('owner'),
        'pgm': station.get('pgm'),
        'type': station.get('type')
    }
    stations.append(station_info)

# Create a dataframe from the extracted station information
df_stations = pd.DataFrame(stations)
print(df_stations)
print(df_stations.count())

# Convert the df_stations to a spatial dataframe using GeoAccessor
# Assuming df_stations has 'lat' and 'lon' columns for latitude and longitude
spatial_stations_df = GeoAccessor.from_xy(df_stations, x_column='lon', y_column='lat')

# Save the spatial dataframe to the feature class in the file geodatabase
spatial_stations_df.spatial.to_featureclass(location=os.path.join(gdb, "ndbc_stations"))
print("Feature class 'stations' created in the geodatabase.")
# Filter the dataframe to show only the records where owner is 'National Estuarine Research Reserve System'
nerrs_stations_df = df_stations[df_stations['owner'] == 'National Estuarine Research Reserve System']

# Convert the filtered dataframe to a spatial dataframe
nerrs_spatial_df = GeoAccessor.from_xy(nerrs_stations_df, x_column='lon', y_column='lat')

# Save the spatial dataframe to the feature class in the file geodatabase
nerrs_spatial_df.spatial.to_featureclass(location=os.path.join(gdb, "nerrs_Stations"))
print("Feature class 'NERRS stations' created in the geodatabase.")

         id                                         name     lat       lon  \
0     0y2w3                  Sturgeon Bay CG Station, WI  44.794   -87.313   
1     13001                                 NE Extension      12       -23   
2     13002                                 NE Extension      21       -23   
3     13008                                       Reggae      15       -38   
4     13009                                      Lambada       8       -38   
...     ...                                          ...     ...       ...   
1344  yata2           9453220 - Yakutat, Yakutat Bay, AK  59.548  -139.733   
1345  ygnn6              Niagara Coast Guard Station, NY  43.262   -79.064   
1346  yktv2  8637689 - Yorktown USCG Training Center, VA  37.227   -76.479   
1347  yrsv2   Taskinas Creek, Chesapeake Bay Reserve, VA  37.414   -76.712   
1348  zbqn7                 Zeke's Basin, North Carolina  33.955   -77.935   

                                                  owner  \
0   

In [24]:
# Fetch the HTML content from the URL
realtime_data_url = "https://www.ndbc.noaa.gov/data/realtime2/"
response = requests.get(realtime_data_url)

# Check if the request was successful
if response.status_code == 200:
    html_content = response.content
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Find all the links in the page
    links = soup.find_all('a')
    
    # Extract the file types from the links
    file_types = set()
    for link in links:
        href = link.get('href')
        if href and '.' in href:
            file_type = href.split('.')[-1]
            file_types.add(file_type)
    
    print("Available file types:", file_types)
else:
    print(f"Failed to fetch data from {realtime_data_url}. Status code: {response.status_code}")

Available file types: {'swdir2', 'cwind', 'data_spec', 'ocean', 'supl', 'adcp', 'spec', 'srad', 'txt', 'swdir', 'swr2', 'rain', 'swr1', 'drift', 'dart'}


In [25]:
# For each id in the stations feature class, create a URL using the ndbc_rt_source + id.upper()
urls_txt = [ndbc_rt_source + station_id.upper() + ".txt" for station_id in nerrs_spatial_df['id']]
urls_ocean = [ndbc_rt_source + station_id.upper() + ".ocean" for station_id in nerrs_spatial_df['id']]

responses_txt = [requests.get(url) for url in urls_txt]
responses_ocean = [requests.get(url) for url in urls_ocean]

# Check if we get a response for each URL
for i, (response_txt, response_ocean) in enumerate(zip(responses_txt, responses_ocean)):
    if response_txt.status_code == 200:
        print(f"Response received for TXT URL: {urls_txt[i]}")
        # Convert the fetched data to a dataframe
        station_data_txt = response_txt.text
        station_df_txt = pd.read_csv(StringIO(station_data_txt), delim_whitespace=True, skiprows=[1])
        # Convert the date columns to a datetime object
        station_df_txt['datetime'] = pd.to_datetime(station_df_txt[['#YY', 'MM', 'DD', 'hh', 'mm']].astype(str).agg('-'.join, axis=1), format='%Y-%m-%d-%H-%M')
        # Add a column to the dataframe called 'id' and populate it with the station id
        station_df_txt.drop(columns=['#YY', 'MM', 'DD', 'hh', 'mm'], inplace=True)
        station_df_txt['station'] = nerrs_spatial_df['id'].iloc[i]
        # Merge the station information with the station data
        station_df_txt = station_df_txt.merge(df_stations, left_on='station', right_on='id', how='left')
        # Convert the dataframe to a spatial dataframe using GeoAccessor
        spatial_station_df_txt = GeoAccessor.from_xy(station_df_txt, x_column='lon', y_column='lat')
        
        # Save the spatial dataframe to the feature class in the file geodatabase
        spatial_station_df_txt.spatial.to_featureclass(location=os.path.join(gdb, f"{nerrs_spatial_df['id'].iloc[i]}_txt"))
        print(f"Feature class '{nerrs_spatial_df['id'].iloc[i]}_txt' created in the geodatabase.")
    else:
        print(f"Failed to receive response for TXT URL: {urls_txt[i]}")
    
    if response_ocean.status_code == 200:
        print(f"Response received for OCEAN URL: {urls_ocean[i]}")
        # Convert the fetched data to a dataframe
        station_data_ocean = response_ocean.text
        station_df_ocean = pd.read_csv(StringIO(station_data_ocean), delim_whitespace=True, skiprows=[1])
        # Convert the date columns to a datetime object
        station_df_ocean['datetime'] = pd.to_datetime(station_df_ocean[['#YY', 'MM', 'DD', 'hh', 'mm']].astype(str).agg('-'.join, axis=1), format='%Y-%m-%d-%H-%M')
        # Add a column to the dataframe called 'id' and populate it with the station id
        station_df_ocean.drop(columns=['#YY', 'MM', 'DD', 'hh', 'mm'], inplace=True)
        station_df_ocean['station'] = nerrs_spatial_df['id'].iloc[i]
        # Merge the station information with the station data
        station_df_ocean = station_df_ocean.merge(df_stations, left_on='station', right_on='id', how='left')
        # Convert the dataframe to a spatial dataframe using GeoAccessor
        spatial_station_df_ocean = GeoAccessor.from_xy(station_df_ocean, x_column='lon', y_column='lat')

        # Save the spatial dataframe to the feature class in the file geodatabase
        spatial_station_df_ocean.spatial.to_featureclass(location=os.path.join(gdb, f"{nerrs_spatial_df['id'].iloc[i]}_ocean"))
        print(f"Feature class '{nerrs_spatial_df['id'].iloc[i]}_ocean' created in the geodatabase.")
    else:
        print(f"Failed to receive response for OCEAN URL: {urls_ocean[i]}")

arcpy.management.ClearWorkspaceCache()   


## download the station related measurements ##
# ndbc_rt_source + test_station + ".txt"



Failed to receive response for TXT URL: https://www.ndbc.noaa.gov/data/realtime2/ACFS1.txt
Response received for OCEAN URL: https://www.ndbc.noaa.gov/data/realtime2/ACFS1.ocean
Feature class 'acfs1_ocean' created in the geodatabase.
Failed to receive response for TXT URL: https://www.ndbc.noaa.gov/data/realtime2/ACQS1.txt
Response received for OCEAN URL: https://www.ndbc.noaa.gov/data/realtime2/ACQS1.ocean
Feature class 'acqs1_ocean' created in the geodatabase.
Response received for TXT URL: https://www.ndbc.noaa.gov/data/realtime2/ACXS1.txt
Feature class 'acxs1_txt' created in the geodatabase.
Failed to receive response for OCEAN URL: https://www.ndbc.noaa.gov/data/realtime2/ACXS1.ocean
Failed to receive response for TXT URL: https://www.ndbc.noaa.gov/data/realtime2/ADBF1.txt
Response received for OCEAN URL: https://www.ndbc.noaa.gov/data/realtime2/ADBF1.ocean
Feature class 'adbf1_ocean' created in the geodatabase.
Response received for TXT URL: https://www.ndbc.noaa.gov/data/realtime

In [26]:


# Connect to ArcGIS Online
gis = GIS("https://www.arcgis.com", "kvangraafeiland_oceans", "Esri8223")

# Create a new item in ArcGIS Online
station_properties = {
    "title": "NERRS Stations",
    "tags": "NERRS, Stations, NOAA",
    "type": "File Geodatabase",
    "description": "This geodatabase contains NERRS station data."
}

# Zip the geodatabase
gdb_zip_path = os.path.join(gdb_path, gdb_name + ".zip")
if os.path.exists(gdb_zip_path):
    os.remove(gdb_zip_path)
with zipfile.ZipFile(gdb_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk(gdb):
        for file in files:
            zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), gdb_path))

print(f"Geodatabase zipped at {gdb_zip_path}")


Geodatabase zipped at E:\gis_projects\GOMA NERRS\NERRS.gdb.zip


In [27]:

# # Publish the geodatabase
# gdb_item = gis.content.add(
#     item_properties = station_properties,
#     data = gdb_zip_path
# )


In [28]:
# Get the item to update
item_to_update = gis.content.get(item_id)

# Update the item with the new data
item_to_update.update(data=gdb_zip_path)

print(f"Item {item_id} updated with new data from {gdb_zip_path}")

Item e94461a2ab9042f9bd5d637fb820eb71 updated with new data from E:\gis_projects\GOMA NERRS\NERRS.gdb.zip


In [29]:

# published_item = gdb_item.publish()
# published_item.share(everyone=True)
# display(published_item)

# print(f"Geodatabase published to ArcGIS Online. Item ID: {published_item.id}")