In [6]:
# check_available_data.py

from erddapy import ERDDAP
import pandas as pd
import requests

# Connect to Irish Marine Institute ERDDAP
e = ERDDAP(
    server="https://erddap.marine.ie/erddap",
    protocol="tabledap"
)
e.dataset_id = "IWBNetwork"

print("="*70)
print("AVAILABLE VARIABLES IN IRISH BUOY NETWORK")
print("="*70)

# Fetch dataset info as CSV
info_url = e.get_info_url(response='csv')
response = requests.get(info_url)

# Parse CSV properly
from io import StringIO
df_info = pd.read_csv(StringIO(response.text))

# Filter for variables only
variables = df_info[df_info['Row Type'] == 'variable']

print(f"\nFound {len(variables)} variables:\n")

# Group by category
met_vars = []
wave_vars = []
water_vars = []
other_vars = []

for idx, row in variables.iterrows():
    var_name = row['Variable Name']
    
    # Try to get description
    attrs = df_info[(df_info['Variable Name'] == var_name) & 
                    (df_info['Attribute Name'] == 'long_name')]
    
    description = attrs['Value'].iloc[0] if not attrs.empty else 'No description'
    
    # Try to get units
    unit_attrs = df_info[(df_info['Variable Name'] == var_name) & 
                        (df_info['Attribute Name'] == 'units')]
    
    units = unit_attrs['Value'].iloc[0] if not unit_attrs.empty else ''
    
    # Categorize
    var_lower = var_name.lower()
    
    if any(x in var_lower for x in ['wind', 'air', 'temp', 'pressure', 'humidity', 'dew']):
        met_vars.append((var_name, description, units))
    elif any(x in var_lower for x in ['wave', 'hmax', 'tp', 'direction', 'period']):
        wave_vars.append((var_name, description, units))
    elif any(x in var_lower for x in ['sea', 'salinity', 'water']):
        water_vars.append((var_name, description, units))
    else:
        other_vars.append((var_name, description, units))

# Display categorized
def print_category(title, vars_list):
    if vars_list:
        print(f"\n{title}")
        print("-" * 70)
        for var_name, desc, units in vars_list:
            unit_str = f" ({units})" if units else ""
            print(f"  • {var_name}{unit_str}")
            print(f"    {desc}")

print_category("METEOROLOGICAL VARIABLES", met_vars)
print_category("WAVE VARIABLES", wave_vars)
print_category("WATER VARIABLES", water_vars)
print_category("OTHER VARIABLES", other_vars)

# Create a summary DataFrame
print("\n" + "="*70)
print("SUMMARY TABLE")
print("="*70)

summary_data = []
for var_name, desc, units in (met_vars + wave_vars + water_vars + other_vars):
    summary_data.append({
        'Variable': var_name,
        'Units': units,
        'Description': desc
    })

summary_df = pd.DataFrame(summary_data)
print(summary_df.to_string(index=False))

# Save to CSV
summary_df.to_csv('irish_buoy_variables.csv', index=False)
print("\n✓ Saved variable list to 'irish_buoy_variables.csv'")

# Now test fetching data with discovered variables
print("\n" + "="*70)
print("TESTING DATA FETCH")
print("="*70)

# Get list of variable names
var_names = [row['Variable Name'] for idx, row in variables.iterrows() 
             if row['Variable Name'] not in ['time', 'station_id', 'latitude', 'longitude']]

print(f"\nAttempting to fetch data with {len(var_names)} variables...")

try:
    e.variables = ["time", "station_id"] + var_names
    e.constraints = {
        "time>=": "now-7days",
        "station_id=": "M5"
    }
    
    df_test = e.to_pandas(index_col="time (UTC)", parse_dates=True)
    
    print(f"✓ Successfully fetched {len(df_test)} records")
    print(f"✓ Columns: {len(df_test.columns)}")
    print(f"\nAvailable columns:")
    for col in df_test.columns:
        print(f"  • {col}")
    
    print("\n" + "="*70)
    print("SAMPLE DATA (Latest 5 records)")
    print("="*70)
    print(df_test.tail().to_string())
    
    # Save sample
    df_test.to_csv('irish_buoy_sample_data.csv')
    print("\n✓ Saved sample data to 'irish_buoy_sample_data.csv'")
    
except Exception as e:
    print(f"✗ Error: {e}")
    print("\nTrying with basic variables only...")
    
    e.variables = ["time", "station_id", "WindSpeed", "AirTemperature", 
                   "AtmosphericPressure", "WaveHeight"]
    
    df_basic = e.to_pandas(index_col="time (UTC)", parse_dates=True)
    print(f"✓ Basic fetch succeeded: {len(df_basic)} records")
    print(df_basic.tail())

AVAILABLE VARIABLES IN IRISH BUOY NETWORK

Found 22 variables:


METEOROLOGICAL VARIABLES
----------------------------------------------------------------------
  • AtmosphericPressure (millibars)
    Air pressure
  • WindDirection (degrees true)
    Wind direction
  • WindSpeed (knots)
    Wind speed
  • AirTemperature (degrees_C)
    Air temperature
  • DewPoint (degrees_C)
    Dew point temperature
  • SeaTemperature (degrees_C)
    Sea surface temperature
  • RelativeHumidity (percent)
    Relative humidity

WAVE VARIABLES
----------------------------------------------------------------------
  • WaveHeight (meters)
    Wave height
  • WavePeriod (seconds)
    Wave period
  • MeanWaveDirection (degrees_true)
    Mean wave direction
  • Hmax (meters)
    Maximum wave height
  • SprTp (degrees)
    Directional spreading at the spectral peak period
  • ThTp (degrees_true)
    Directional spreading at the spectral peak period
  • Tp (seconds)
    Period of peak wave

WATER VARIABLES
--