In [None]:
# 1) Install dependencies if missing
import sys, subprocess, importlib

def ensure(pkg, import_name=None):
    name = import_name or pkg
    try:
        importlib.import_module(name)
        print(f"[ok] {pkg} already installed")
    except Exception:
        print(f"[info] Installing {pkg} ...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])
        importlib.import_module(name)
        print(f"[ok] {pkg} installed")


pkgs = ["os", "re", "requests", "zipfile", "geopandas",
        "matplotlib", "shapely", "cartopy", "obspy", 
        "folium", "osmnx", "numpy", "pandas"]

for pkg in pkgs:
    ensure(pkg)

## Exercise 2: PGV from Seismological Data

In this exercise, we will analyze seismic data from a significant earthquake event near Japan using Python and ObsPy.

The workflow includes querying earthquake events, retrieving waveform data from a seismic station, processing the raw signals, and calculating Peak Ground Acceleration (PGA) for different sensor components.

Visualization tools such as Folium and Matplotlib will be used to map event locations and inspect waveform characteristics. This notebook demonstrates the data commonly used to analyze and derive information about seismic hazards and risks.

To begin, we will install the required packages and check their versions.

In [None]:
#%% Import required libraries
from obspy import UTCDateTime, Stream
from obspy.clients.fdsn import Client
from obspy.geodetics import locations2degrees
from obspy.signal.filter import bandpass
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML

# For a simple map (optional, nice in Colab). Comment out if not wanted.
try:
    import folium
    HAVE_FOLIUM = True
except Exception:
    HAVE_FOLIUM = False


print("ObsPy and friends imported. Version checks:")
import obspy, sys
print("  obspy:", obspy.__version__)
print("  python:", sys.version.split()[0])
print("Folium available:", HAVE_FOLIUM)

# Get the earthqaukes catalogue for Japan

In the next step, we will query the USGS earthquake catalogue for events in the Japan region over the past five years. We will filter for significant earthquakes (magnitude ≥ 6) and select the largest event for further analysis. This catalogue will provide the event information needed for waveform retrieval and ground motion calculations.

In [None]:
# FDSN clients: USGS for events, IRIS for waveforms/responses (IU network is served by IRIS)
client_evt = Client("USGS")
client_wf = Client("IRIS")

# Time window: last 5 years
t_end = UTCDateTime()                 # "now"
t_start = t_end - 5 * 365 * 24 * 3600 # ~5 years

# Japan-ish box
minlat, maxlat = 24.0, 46.0
minlon, maxlon = 122.0, 153.0

# Query events (raise magnitude if you get too many)
cat = client_evt.get_events(
    starttime=t_start, endtime=t_end,
    minlatitude=minlat, maxlatitude=maxlat,
    minlongitude=minlon, maxlongitude=maxlon,
    minmagnitude=6, orderby="magnitude"
)

print(f"Found {len(cat)} event(s).")

# Quick map of events
# cat.plot(projection="local", continent_fill_color="lightgray",
#          water_fill_color="lightblue", resolution="h", color='depth',
#          title="Event epicenters colored by depth")

max_mag = None
max_mag_ev = None
for ev in cat:
    mag = ev.preferred_magnitude()
    if mag and (max_mag is None or mag.mag > max_mag):
        max_mag = mag.mag
        max_mag_ev = ev

origin = max_mag_ev.preferred_origin() or max_mag_ev.origins[0]
magnitude = max_mag_ev.preferred_magnitude() or max_mag_ev.magnitudes[0]

ev_time = origin.time
ev_lat, ev_lon, ev_depth_km = origin.latitude, origin.longitude, (origin.depth or 0)/1000.0
ev_mag = magnitude.mag
ev_mag_type = magnitude.magnitude_type

print("\nChosen event:")
print(f"  Time (UTC): {ev_time}")
print(f"  Location  : lat {ev_lat:.3f}, lon {ev_lon:.3f}, depth {ev_depth_km:.1f} km")
print(f"  Magnitude : {ev_mag:.1f} {ev_mag_type}")

# Get the station data

In this step, we select a seismic station near the earthquake epicenter to retrieve waveform data. We use the IRIS FDSN client to request station metadata and waveform recordings for the IU.MAJO station, covering a time window around the event. The station's coordinates are extracted for mapping and distance calculations. This data will be used to analyze ground motion and compute peak ground acceleration (PGA) in subsequent steps.

In [None]:
net, sta = "IU", "MAJO"
loc = "*"       # accept any location code (e.g., 00 or 10)
cha = "*"     # broadband, 20 sps typically; change to "HH?" if BH not available

# Time window around the event (adjust if needed)
t1 = ev_time - 60
t2 = ev_time + 600

print(f"Requesting stations and responses for {net}.{sta} {cha} {t1} → {t2}")
inv = client_wf.get_stations(network=net, station=sta, location=loc, channel=cha,
                             starttime=t1, endtime=t2, level="response")
print(inv)

print(f"\nRequesting waveforms for {net}.{sta}.{loc}.{cha}  {t1} → {t2}")
st_raw = client_wf.get_waveforms(network=net, station=sta, location=loc, channel=cha,
                                 starttime=t1, endtime=t2, attach_response=False)
st_raw.sort()
st_raw.merge(method=1, fill_value="interpolate")

# Get station lat/lon from inventory
net0 = inv.networks[0]
sta0 = net0.stations[0]
sta_lat, sta_lon = sta0.latitude, sta0.longitude
print(f"Station coordinates: lat {sta_lat:.3f}, lon {sta_lon:.3f}")

# Visualizing Earthquake and Station Locations

In the next step, we will create an interactive map using Folium to visualize the locations of the earthquake epicenter, the selected seismic station, and all catalogued events in the Japan region. This map provides spatial context for the seismic analysis, helping us understand the proximity of the station to the event and the distribution of recent significant earthquakes.

In [None]:
m = folium.Map(location=[(minlat+maxlat)/2, (minlon+maxlon)/2], zoom_start=4, tiles="CartoDB positron")

# Plot all events as blue markers
for ev in cat:
    o = ev.preferred_origin() or ev.origins[0]
    m_ev = ev.preferred_magnitude() or ev.magnitudes[0]
    folium.CircleMarker([o.latitude, o.longitude], radius=3, color="blue",
                        popup=f"M{m_ev.mag:.1f} {m_ev.magnitude_type}\n{o.time.isoformat()}",
                        fill=True, fill_opacity=0.7).add_to(m)

folium.Rectangle(bounds=[(minlat,minlon),(maxlat,maxlon)], color="blue", fill=False).add_to(m)
folium.Marker([ev_lat, ev_lon], popup=f"M{ev_mag:.1f} {ev_mag_type}\n{ev_time.isoformat()}",
                icon=folium.Icon(color="red")).add_to(m)
folium.Marker([sta_lat, sta_lon], popup=f"{net}.{sta}",
                icon=folium.Icon(color="green", icon="info-sign")).add_to(m)

m

# Calculating Epicentral Distance

In this step, we calculate the epicentral distance between the selected earthquake event and the seismic station. The distance is computed in degrees using the geographic coordinates of both locations. This value is important for understanding the relationship between ground motion measurements and the proximity to the earthquake source, which is a key factor in seismic hazard analysis.

In [None]:
# Print distances (event to station)
# Use the first channel's station coords from inventory
net0 = inv.networks[0]
sta0 = net0.stations[0]
dist_deg = locations2degrees(ev_lat, ev_lon, sta0.latitude, sta0.longitude)
print(f"Epicentral distance to {net}.{sta}: {dist_deg:.2f}°  (~{dist_deg*111:.0f} km)")

## Processing and Analyzing Seismic Waveforms from Seismometers (BH*)

In the following steps, we process the raw waveform data from the selected seismic station to prepare it for ground motion analysis. This involves selecting the appropriate seismometer channels, applying detrending and tapering to remove baseline offsets and edge effects, and removing the instrument response to obtain acceleration in physical units (m/s²). The processed traces are then rotated to standard ZNE (vertical, north, east) components for consistent analysis. Visualizing these processed waveforms allows us to inspect the quality and characteristics of the seismic signals before calculating peak ground acceleration (PGA).

In [None]:
#%% Select seismometer BH* and process it
# Select BH* channels (broadband seismometer)
st_sei = st_raw.copy().select(channel = "BH*", location="00")
st_sei.plot(size=(800, 300 * len(st_sei)), title="Raw BH* traces")

# Detrend / taper
st_sei.detrend("demean")
st_sei.detrend("linear")
st_sei.taper(max_percentage=0.02, type="hann")

# A conservative pre-filter for deconvolution (in Hz):
# (low-cut corner1, low-cut corner2, high-cut corner1, high-cut corner2)
# Choose within instrument bandwidth. Adjust if traces look noisy or clipped.
nyquist = min(tr.stats.sampling_rate for tr in st_sei) / 2.0
high2 = min(25.0, nyquist * 0.95)
pre_filt = (0.05, 0.1, 20.0, high2)

print("Using pre_filt =", pre_filt)

# Remove instrument response to get acceleration in m/s^2
st_sei.remove_response(inventory=inv, output="ACC", pre_filt=pre_filt, 
                       water_level=60)

# Rotate to ZNE because we have 1/2 components
st_sei.rotate(method='->ZNE', inventory=inv)
print(st_sei)

st_sei.plot(size=(800, 300 * len(st_sei)), 
            title="Processed velocity (m/s)")

# Compute PGA from seismometer data

In this cell, we compute the Peak Ground Acceleration (PGA) from the processed broadband seismometer traces (BH* components). The code calculates the maximum absolute acceleration for each component (vertical, north, east) and presents the results in both m/s² and as a fraction of gravity (g). Additionally, it computes the horizontal vector PGA by combining the north and east components, providing a measure of the strongest horizontal shaking experienced at the station during the earthquake. This analysis is essential for understanding the intensity of ground motion and its potential impact on structures and infrastructure.

In [None]:
#%% Compute PGA from BH* components
def component_tag(tr):
    """Return a short tag like 'BHZ' from a Trace."""
    net = tr.stats.network
    sta = tr.stats.station
    cha = tr.stats.channel
    return f"{net}.{sta}.{cha}"

# Compute per-component PGA (m/s^2)
pga_comp = {}
for tr in st_sei:
    tag = component_tag(tr)
    pga_comp[tag] = float(np.max(np.abs(tr.data)))

# Compute horizontal vector PGA of the E/N components

def pga_horizontal_pair(tr_e, tr_n):
    """Compute vector PGA from two horizontal traces."""
    n = min(len(tr_e.data), len(tr_n.data))
    aH = np.sqrt(tr_e.data[:n]**2 + tr_n.data[:n]**2)
    return float(np.max(np.abs(aH)))

tr_e = st_sei.select(channel="BHE")[0]
tr_n = st_sei.select(channel="BHN")[0]

pga_H = pga_horizontal_pair(tr_e, tr_n)
pair_used = (component_tag(tr_e), component_tag(tr_n))

# Present results
g = 9.80665
print("PGA per component:")
for k, v in sorted(pga_comp.items()):
    print(f"  {k:>12s}: {v:8.5f} m/s²  ({v/g:7.5f} g)")

if pga_H is not None:
    print(f"\nHorizontal vector PGA using {pair_used[0]} + {pair_used[1]}:")
    print(f"  PGA_H: {pga_H:8.5f} m/s²  ({pga_H/g:7.5f} g)")
else:
    print("\nHorizontal vector PGA: not computed (E/N pair not found).")

## Processing Accelerometer (HN*) Data and Computing PGA

In this section, we process the strong-motion accelerometer traces (HN* channels) from the selected seismic station. The workflow mirrors the previous steps for broadband seismometer data: we select the relevant channels, apply detrending and tapering, remove the instrument response to obtain acceleration in physical units, and rotate the traces to standard ZNE components. After processing, we compute the Peak Ground Acceleration (PGA) for each component and the horizontal vector PGA, providing insight into the strongest shaking recorded by the accelerometer during the earthquake. Comparing PGA values from both seismometer and accelerometer data helps assess ground motion characteristics and instrument performance.

In [None]:
#%% Process HN* traces the same way as before
st_hn = st_raw.copy().select(channel="HN*", location=loc) # pick a location code if multiple are present
for tr in st_hn:
    tr.stats.backup = dict(tr.stats)  # keep a copy of original stats for reference 
st_hn.detrend("demean")
st_hn.detrend("linear")
st_hn.taper(max_percentage=0.02, type="hann")

# Remove instrument response to get acceleration in m/s^2
st_hn.remove_response(inventory=inv, output="ACC", pre_filt=pre_filt, water_level=60)

print(st_hn)

st_hn.rotate(method='->ZNE', inventory=inv)

st_hn.plot(size=(800, 300 * len(st_hn)), title="Processed HN* acceleration (m/s²)")

## Compute PGA from Accelerometer Data

In this cell, we calculate the Peak Ground Acceleration (PGA) from the processed strong-motion accelerometer traces (HN* components). The code determines the maximum absolute acceleration for each component (vertical, north, east) and presents the results in both m/s² and as a fraction of gravity (g). It also computes the horizontal vector PGA by combining the north and east components, providing a measure of the strongest horizontal shaking recorded by the accelerometer during the earthquake. This analysis is useful for comparing ground motion intensity between broadband seismometer and accelerometer data.

In [None]:
#%% Compute PGA from HN* components
pga_comp_hn = {}
for tr in st_hn:
    tag = component_tag(tr)
    pga_comp_hn[tag] = float(np.max(np.abs(tr.data)))
# Compute horizontal vector PGA if we have E/N (or 1/2) components
tr_e = st_hn.select(channel="HNE")[0]
tr_n = st_hn.select(channel="HNN")[0]
pga_H_hn = pga_horizontal_pair(tr_e, tr_n)
pair_used_hn = (component_tag(tr_e), component_tag(tr_n))

# Present results
# g in m/s^2
g = 9.80665
print("PGA per HN* component:")
for k, v in sorted(pga_comp_hn.items()):
    print(f"  {k:>12s}: {v:8.5f} m/s²  ({v/g:7.5f} g)")

if pga_H_hn is not None:
    print(f"\nHorizontal vector PGA using {pair_used_hn[0]} + {pair_used_hn[1]}:")
    print(f"  PGA_H: {pga_H_hn:8.5f} m/s²  ({pga_H_hn/g:7.5f} g)")
else:
    print("\nHorizontal vector PGA: not computed (E/N pair not found).")

# Extra tasks:
* Did you observe the difference between PGA from seismometer and accelerometer data?
* Why is there a difference?
* Can you explain the difference between seismometer and accelerometer measurements?
* If you have a site near faults with expected high-magnitude earthquakes, would you install a seismometer nearby?
* In the inventory, you will find different location identifiers (e.g., 0, 10, 60) for the seismometer. Calculate PGA for other locations.
* Is there a difference between the PGA from different locations? Why?
