# Monitored Volcanoes Filter Demo

This notebook loads `../reference/monitored_volcanoes.json` and filters to actual volcanoes using the presence of `vnum` (Smithsonian number).


In [1]:
from pathlib import Path
import json

REFERENCE_JSON = Path('../reference/monitored_volcanoes.json')

with open(REFERENCE_JSON, 'r') as f:
    data = json.load(f)

len(data), data[0].get('volcano_name')


(68, 'Akutan')

In [2]:
def filter_actual_volcanoes(volcano_list):
    """Remove regional aggregate entries, keep only specific volcanoes"""
    return [v for v in volcano_list if v.get('vnum') is not None and v.get('vnum') != ""]

actual_volcanoes = filter_actual_volcanoes(data)
len(actual_volcanoes)


66

In [3]:
# Preview first 15 actual volcanoes
for v in actual_volcanoes[:100]:
    print(f"{v['volcano_name']}\t{v['vnum']}\t{v['color_code']}\t{v['alert_level']}")


Akutan	311320	GREEN	NORMAL
Aniakchak	312090	GREEN	NORMAL
Atka volcanic complex	311160	GREEN	NORMAL
Augustine	313010	GREEN	NORMAL
Cleveland	311240	GREEN	NORMAL
Dutton	312011	GREEN	NORMAL
Edgecumbe	315040	GREEN	NORMAL
Fisher	311350	GREEN	NORMAL
Gareloi	311070	GREEN	NORMAL
Great Sitkin	311120	ORANGE	WATCH
Griggs	312190	GREEN	NORMAL
Iliamna	313020	GREEN	NORMAL
Isanotski	311370	GREEN	NORMAL
Kanaga	311110	GREEN	NORMAL
Katmai	312170	GREEN	NORMAL
Korovin	311161	GREEN	NORMAL
Little Sitkin	311050	GREEN	NORMAL
Mageik	312150	GREEN	NORMAL
Makushin	311310	GREEN	NORMAL
Martin	312140	GREEN	NORMAL
Novarupta	312180	GREEN	NORMAL
Okmok	311290	GREEN	NORMAL
Pavlof	312030	GREEN	NORMAL
Redoubt	313030	GREEN	NORMAL
Semisopochnoi	311060	GREEN	NORMAL
Shishaldin	311360	YELLOW	ADVISORY
Snowy Mountain	312200	GREEN	NORMAL
Spurr	313040	GREEN	NORMAL
Takawangha	311090	GREEN	NORMAL
Tanaga	311080	GREEN	NORMAL
Trident	312160	GREEN	NORMAL
Ugashik-Peulik	312130	GREEN	NORMAL
Ukinrek Maars	312131	GREEN	NORMAL
Veniaminof	312070

In [4]:
import sys
from pathlib import Path

# Add repo root to sys.path so we can import python_code.*
repo_root = Path('..').resolve()
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

from python_code.data_management import get_monitored_volcanoes

volcanoes = get_monitored_volcanoes()
print(len(volcanoes))
volcanoes[:3]


66


[{'volcano_name': 'Akutan',
  'vnum': '311320',
  'sent_utc': '2019-09-06 18:29:39',
  'sent_unixtime': 1567794579,
  'alert_level': 'NORMAL',
  'color_code': 'GREEN',
  'volcano_cd': 'ak6',
  'obs_fullname': 'Alaska Volcano Observatory',
  'obs_abbr': 'avo',
  'notice_type_cd': 'IS',
  'notice_identifier': 'DOI-USGS-AVO-2019-09-04T15:47:55-08:00',
  'notice_url': 'https://volcanoes.usgs.gov/hans-public/notice/DOI-USGS-AVO-2019-09-04T15:47:55-08:00',
  'notice_data': 'https://volcanoes.usgs.gov/hans-public/api/notice/getNotice/DOI-USGS-AVO-2019-09-04T15:47:55-08:00'},
 {'volcano_name': 'Aniakchak',
  'vnum': '312090',
  'sent_utc': '2024-07-08 20:52:29',
  'sent_unixtime': 1720471949,
  'alert_level': 'NORMAL',
  'color_code': 'GREEN',
  'volcano_cd': 'ak14',
  'obs_fullname': 'Alaska Volcano Observatory',
  'obs_abbr': 'avo',
  'notice_type_cd': 'VV',
  'notice_identifier': 'DOI-USGS-AVO-2024-07-08T20:02:22+00:00',
  'notice_url': 'https://volcanoes.usgs.gov/hans-public/notice/DOI-USG

In [None]:
# # Quick audit with detailed output
# import sys
# from pathlib import Path
# repo_root = Path('..').resolve()
# if str(repo_root) not in sys.path:
#     sys.path.insert(0, str(repo_root))

# from python_code.audit_station_availability import audit_volcanoes

# # Run audit on 2 volcanoes
# sample_results = audit_volcanoes(limit=100, radius_km=50)

# # Show detailed results including distance
# print("\n" + "="*80)
# print("DETAILED RESULTS WITH DISTANCE")
# print("="*80)

# for volcano in sample_results:
#     print(f"\n{volcano['name']}:")
#     print(f"  Location: {volcano['lat']:.3f}, {volcano['lon']:.3f}")
    
#     # Show closest seismic stations
#     if volcano.get('seismic_channels'):
#         seismic_sorted = sorted(volcano['seismic_channels'], key=lambda x: x.get('distance_km', 999))[:3]
#         print(f"  Closest seismic stations:")
#         for ch in seismic_sorted:
#             print(f"    {ch['station']} - {ch['channel']} @ {ch.get('distance_km', 'N/A'):.1f}km, {ch.get('sample_rate', 0):.0f}Hz")
    
#     # Show closest infrasound stations
#     if volcano.get('infrasound_channels'):
#         infra_sorted = sorted(volcano['infrasound_channels'], key=lambda x: x.get('distance_km', 999))[:3]
#         print(f"  Closest infrasound stations:")
#         for ch in infra_sorted:
#             print(f"    {ch['station']} - {ch['channel']} @ {ch.get('distance_km', 'N/A'):.1f}km, {ch.get('sample_rate', 0):.0f}Hz")

# len(sample_results), sample_results[0]['name'] if sample_results else None


Processing 66 volcanoes...
--------------------------------------------------------------------------------

[1/66] Akutan (311320)
  📍 Location: 54.133, -165.986
  🌊 Seismic: 40 stations @ [100.0, 50.0] Hz
  🔊 Infrasound: 1 stations @ [50.0] Hz

[2/66] Aniakchak (312090)
  📍 Location: 56.906, -158.209
  🌊 Seismic: 16 stations @ [100.0, 50.0] Hz
  🔊 Infrasound: 1 stations @ [50.0] Hz

[3/66] Atka volcanic complex (311160)
  📍 Location: 52.331, -174.139
  🌊 Seismic: 8 stations @ [100.0, 50.0] Hz
  🔊 Infrasound: 4 stations @ [50.0] Hz

[4/66] Augustine (313010)
  📍 Location: 59.363, -153.435
  🌊 Seismic: 30 stations @ [100.0, 50.0, 40.0] Hz
  🔊 Infrasound: 5 stations @ [50.0, 40.0, 20.0] Hz

[5/66] Cleveland (311240)
  📍 Location: 52.822, -169.945
  🌊 Seismic: 23 stations @ [50.0] Hz
  🔊 Infrasound: 9 stations @ [50.0] Hz

[6/66] Dutton (312011)
  📍 Location: 55.187, -162.274
  🌊 Seismic: 28 stations @ [100.0, 50.0] Hz
  🔊 Infrasound: 6 stations @ [50.0] Hz

[7/66] Edgecumbe (315040)
  📍

(66, 'Akutan')

In [None]:
# # Interactive Audify UI (seismic or infrasound)
# import sys, json
# from pathlib import Path
# from datetime import datetime, timedelta, timezone
# import ipywidgets as widgets
# from IPython.display import display, clear_output

# # Ensure repo root import
# repo_root = Path('..').resolve()
# if str(repo_root) not in sys.path:
#     sys.path.insert(0, str(repo_root))

# from python_code.seismic_utils import compute_time_window, fetch_seismic_data
# from python_code.audio_utils import create_audio_file, open_audio_file

# ACTIVE_PATH = repo_root / 'data' / 'reference' / 'active_volcano_stations.json'
# with open(ACTIVE_PATH, 'r') as f:
#     active = json.load(f)

# # Build volcano list
# volcano_names = sorted({a['volcano'] for a in active})
# volcano_dd = widgets.Dropdown(options=volcano_names, description='Volcano:')

# type_dd = widgets.Dropdown(options=['seismic','infrasound'], value='seismic', description='Type:')
# hours_int = widgets.IntSlider(value=3, min=1, max=48, step=1, description='Last hours:')

# channel_dd = widgets.Dropdown(options=[], description='Channel:')
# button = widgets.Button(description='Audify', button_style='primary')
# log_out = widgets.Output()

# # Helper: pick best channels (prefer Z, then N/E)
# def channels_for(volcano, kind):
#     items = [a for a in active if a['volcano']==volcano and a['type']==kind]
#     # Prefer Z first
#     def score(ch):
#         c = ch['channel']
#         if len(c)>=3 and c[2]=='Z':
#             return 0
#         if len(c)>=3 and c[2] in ('N','E'):
#             return 1
#         return 2
#     items.sort(key=score)
#     # Display label: NET.STA.LOC.CHAN (sr)
#     options = []
#     for ch in items:
#         loc = ch.get('location') or ''
#         label = f"{ch['network']}.{ch['station']}.{loc or '--'}.{ch['channel']} ({int(ch.get('sample_rate',0))} Hz)"
#         options.append((label, ch))
#     return options

# # Update channel list when volcano or type changes
# def update_channels(*args):
#     opts = channels_for(volcano_dd.value, type_dd.value)
#     channel_dd.options = opts

# volcano_dd.observe(update_channels, names='value')
# type_dd.observe(update_channels, names='value')
# update_channels()

# @log_out.capture(clear_output=True)
# def on_click(b):
#     ch = channel_dd.value
#     if not ch:
#         print('Select a channel')
#         return
#     # Time window: last N hours
#     end_dt = datetime.now(timezone.utc)
#     start_dt = end_dt - timedelta(hours=hours_int.value)
#     start_str = start_dt.strftime('%Y-%m-%dT%H:%M:%S')
#     end_str = end_dt.strftime('%Y-%m-%dT%H:%M:%S')

#     # Fetch and audify
#     n,s,l,c = ch['network'], ch['station'], (ch.get('location') or ''), ch['channel']
#     fname = repo_root / 'Audio_Files' / f"{volcano_dd.value}_{n}_{s}_{l or '--'}_{c}_{hours_int.value}h.mseed"
#     wav = repo_root / 'Audio_Files' / f"{volcano_dd.value}_{n}_{s}_{l or '--'}_{c}_{hours_int.value}h.wav"
#     try:
#         st = fetch_seismic_data(start_str, end_str, str(fname), network=n, station=s, channel=c, location=l)
#         create_audio_file(st, sampling_rate=44100, audio_filename=str(wav))
#         open_audio_file(str(wav))
#     except Exception as e:
#         print('Error:', e)

# button.on_click(on_click)

# ui = widgets.VBox([
#     widgets.HBox([volcano_dd, type_dd, hours_int]),
#     channel_dd,
#     button,
#     log_out
# ])
# ui


VBox(children=(HBox(children=(Dropdown(description='Volcano:', options=('Akutan', 'Aniakchak', 'Atka volcanic …

In [7]:
# # Active-volcano-only Audify UI
# import sys, json
# from pathlib import Path
# from datetime import datetime, timedelta, timezone
# import ipywidgets as widgets
# from IPython.display import display, clear_output

# # Ensure repo root import
# repo_root = Path('..').resolve()
# if str(repo_root) not in sys.path:
#     sys.path.insert(0, str(repo_root))

# from python_code.seismic_utils import fetch_seismic_data
# from python_code.audio_utils import create_audio_file, open_audio_file

# ACTIVE_PATH = repo_root / 'data' / 'reference' / 'active_volcano_stations.json'
# MONITORED_PATH = repo_root / 'reference' / 'monitored_volcanoes.json'
# with open(ACTIVE_PATH, 'r') as f:
#     active = json.load(f)
# with open(MONITORED_PATH, 'r') as f:
#     monitored = json.load(f)

# # Currently active per USGS (non-GREEN or non-NORMAL)
# def is_alerting(v):
#     return (v.get('color_code') and v['color_code'] != 'GREEN') or (v.get('alert_level') and v['alert_level'] != 'NORMAL')

# active_names_with_channels = {a['volcano'] for a in active}
# active_usgs_names = {v['volcano_name'] for v in monitored if v.get('vnum') and is_alerting(v)}
# volcano_names_active = sorted(active_names_with_channels & active_usgs_names)

# volcano_dd2 = widgets.Dropdown(options=volcano_names_active, description='Volcano:')
# type_dd2 = widgets.Dropdown(options=['seismic','infrasound'], value='seismic', description='Type:')
# hours_int2 = widgets.IntSlider(value=6, min=1, max=48, step=1, description='Last hours:')

# channel_dd2 = widgets.Dropdown(options=[], description='Channel:')
# button2 = widgets.Button(description='Audify', button_style='primary')
# button2.disabled = True
# log_out2 = widgets.Output()

# # Helper to build channel options, prefer Z then N/E
# def channels_for(volcano, kind):
#     items = [a for a in active if a['volcano']==volcano and a['type']==kind]
#     def score(ch):
#         c = ch['channel']
#         if len(c)>=3 and c[2]=='Z':
#             return 0
#         if len(c)>=3 and c[2] in ('N','E'):
#             return 1
#         return 2
#     items.sort(key=score)
#     options = []
#     for ch in items:
#         loc = ch.get('location') or ''
#         label = f"{ch['network']}.{ch['station']}.{loc or '--'}.{ch['channel']} ({int(ch.get('sample_rate',0))} Hz)"
#         options.append((label, ch))
#     return options

# # Update on selection change
# def update_channels2(*args):
#     opts = channels_for(volcano_dd2.value, type_dd2.value)
#     channel_dd2.options = opts
#     if opts:
#         channel_dd2.value = opts[0][1]
#         button2.disabled = False
#     else:
#         channel_dd2.value = None
#         button2.disabled = True

# volcano_dd2.observe(update_channels2, names='value')
# type_dd2.observe(update_channels2, names='value')
# update_channels2()

# @log_out2.capture(clear_output=True)
# def on_click2(b):
#     ch = channel_dd2.value
#     if not ch:
#         print('Select a channel')
#         return
#     end_dt = datetime.now(timezone.utc)
#     start_dt = end_dt - timedelta(hours=hours_int2.value)
#     start_str = start_dt.strftime('%Y-%m-%dT%H:%M:%S')
#     end_str = end_dt.strftime('%Y-%m-%dT%H:%M:%S')

#     n,s,l,c = ch['network'], ch['station'], (ch.get('location') or ''), ch['channel']
#     fname = repo_root / 'Audio_Files' / f"{volcano_dd2.value}_{n}_{s}_{l or '--'}_{c}_{hours_int2.value}h.mseed"
#     wav = repo_root / 'Audio_Files' / f"{volcano_dd2.value}_{n}_{s}_{l or '--'}_{c}_{hours_int2.value}h.wav"
#     try:
#         st = fetch_seismic_data(start_str, end_str, str(fname), network=n, station=s, channel=c, location=l)
#         if not st or len(st)==0:
#             print('No data returned for this channel/time window.')
#             return
#         create_audio_file(st, sampling_rate=44100, audio_filename=str(wav))
#         open_audio_file(str(wav))
#     except Exception as e:
#         print('Error:', e)

# button2.on_click(on_click2)

# ui_active = widgets.VBox([
#     widgets.HBox([volcano_dd2, type_dd2, hours_int2]),
#     channel_dd2,
#     button2,
#     log_out2
# ])
# ui_active


In [11]:
# Kilauea-Focused UI (sorted by distance)
import sys, json
from pathlib import Path
from datetime import datetime, timedelta, timezone
import ipywidgets as widgets
from IPython.display import display, clear_output

# Ensure repo root import
repo_root = Path('..').resolve()
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

from python_code.seismic_utils import fetch_seismic_data
from python_code.audio_utils import create_audio_file, open_audio_file

# Load from volcano_station_availability.json (has distance_km field)
AVAIL_PATH = repo_root / 'data' / 'reference' / 'volcano_station_availability.json'
with open(AVAIL_PATH, 'r') as f:
    availability_data = json.load(f)

# Find Kilauea data
kilauea_volcano = [v for v in availability_data if v['name'] == 'Kilauea'][0]

# Flatten seismic and infrasound channels with type label
kilauea_data = []
for ch in kilauea_volcano.get('seismic_channels', []):
    ch_copy = ch.copy()
    ch_copy['type'] = 'seismic'
    ch_copy['volcano'] = 'Kilauea'
    kilauea_data.append(ch_copy)
    
for ch in kilauea_volcano.get('infrasound_channels', []):
    ch_copy = ch.copy()
    ch_copy['type'] = 'infrasound'
    ch_copy['volcano'] = 'Kilauea'
    kilauea_data.append(ch_copy)

type_dd3 = widgets.Dropdown(options=['seismic','infrasound'], value='seismic', description='Type:')
hours_int3 = widgets.IntSlider(value=6, min=1, max=48, step=1, description='Last hours:')
channel_dd3 = widgets.Dropdown(options=[], description='Channel:')
button3 = widgets.Button(description='Audify', button_style='success')
button3.disabled = True
log_out3 = widgets.Output()

# Verified stations with real-time data
VERIFIED_STATIONS = {
    'POHA',  # IU.POHA.32.BDF - verified infrasound
    'OBL',   # HV.OBL - verified seismic
}

# Helper: get unique stations with distance, sorted by proximity
def stations_by_distance(kind):
    items = [a for a in kilauea_data if a['type']==kind]
    
    # FILTER: Only active channels (empty end_time)
    active_items = [ch for ch in items if not ch.get('end_time')]
    
    # Group by station, keep closest channel
    stations = {}
    for ch in active_items:
        station_key = (ch['network'], ch['station'])
        dist = ch.get('distance_km', 999)
        
        if station_key not in stations or dist < stations[station_key]['distance_km']:
            stations[station_key] = {
                'network': ch['network'],
                'station': ch['station'],
                'distance_km': dist,
                'channels': []
            }
    
    # Collect all channels for each station
    for ch in active_items:
        station_key = (ch['network'], ch['station'])
        if station_key in stations:
            stations[station_key]['channels'].append(ch)
    
    # Sort by distance
    sorted_stations = sorted(stations.values(), key=lambda x: x['distance_km'])
    
    # Build dropdown options with distance labels
    options = []
    for st in sorted_stations:
        # Prefer Z channels
        channels = sorted(st['channels'], key=lambda x: 0 if 'Z' in x['channel'] else 1)
        for ch in channels:
            loc = ch.get('location') or ''
            dist_str = f"{st['distance_km']:.1f}km"
            # Add ⭐ to Z channels
            star = " ⭐" if 'Z' in ch['channel'] else ""
            # Add 🟢 to verified stations
            verified = " 🟢" if ch['station'] in VERIFIED_STATIONS else ""
            label = f"{st['station']} ({dist_str}) - {ch['channel']}{star}{verified} @ {int(ch.get('sample_rate',0))}Hz"
            options.append((label, ch))
    
    return options

# Update channel list when type changes
def update_channels3(*args):
    opts = stations_by_distance(type_dd3.value)
    channel_dd3.options = opts
    if opts:
        channel_dd3.value = opts[0][1]
        button3.disabled = False
    else:
        channel_dd3.value = None
        button3.disabled = True

type_dd3.observe(update_channels3, names='value')
update_channels3()

@log_out3.capture(clear_output=True)
def on_click3(b):
    ch = channel_dd3.value
    if not ch:
        print('Select a channel')
        return
    
    end_dt = datetime.now(timezone.utc) - timedelta(minutes=5)  # Account for latency
    start_dt = end_dt - timedelta(hours=hours_int3.value)
    start_str = start_dt.strftime('%Y-%m-%dT%H:%M:%S')
    end_str = end_dt.strftime('%Y-%m-%dT%H:%M:%S')

    n,s,l,c = ch['network'], ch['station'], (ch.get('location') or ''), ch['channel']
    fname = repo_root / 'Audio_Files' / f"Kilauea_{s}_{c}_{hours_int3.value}h.mseed"
    wav = repo_root / 'Audio_Files' / f"Kilauea_{s}_{c}_{hours_int3.value}h.wav"
    
    print(f"Fetching {n}.{s}.{l or '--'}.{c} ({ch.get('distance_km',0):.1f}km from Kilauea)")
    
    try:
        st = fetch_seismic_data(start_str, end_str, str(fname), network=n, station=s, channel=c, location=l)
        if not st or len(st)==0:
            print('No data returned for this channel/time window.')
            return
        create_audio_file(st, sampling_rate=44100, audio_filename=str(wav))
        print(f'✓ Created {wav.name}')
        open_audio_file(str(wav))
    except Exception as e:
        print('Error:', e)

button3.on_click(on_click3)

ui_kilauea = widgets.VBox([
    widgets.HTML("<h3>🌋 Kilauea Stations (sorted by distance)</h3>"),
    widgets.HBox([type_dd3, hours_int3]),
    channel_dd3,
    button3,
    log_out3
])
ui_kilauea


VBox(children=(HTML(value='<h3>🌋 Kilauea Stations (sorted by distance)</h3>'), HBox(children=(Dropdown(descrip…

In [9]:
# Preview summary CSV written by the audit script
from pathlib import Path
import pandas as pd

csv_path = Path('../data/reference/volcano_station_summary.csv')
if csv_path.exists():
    df = pd.read_csv(csv_path)
    df.head(10)
else:
    print('Summary CSV not found; run the audit cell first.')
