In [None]:
# !pip install dascore --upgrade --quiet
# !pip install PyQt5 --upgrade --quiet

In [None]:
import os
import numpy as np
from scipy.interpolate import CubicSpline

import dascore as dc
from dascore.units import Hz

from matplotlib import pyplot as plt

# Use Qt backend for interactive plots
%matplotlib qt

### Reading and Preprocessing DAS Data

*Note: You can skip this section if you're using the provided example dataset (`example.h5`).*

In [None]:
# Define file path and filename for DAS data
folder_path = 'Path/To/Your/DAS/Folder'  # Replace with your actual folder path
filename = 'data.hdf5'  # Replace with your actual DAS file name

full_path = os.path.join(folder_path, filename)

# Read and preprocess data
pa = dc.spool(full_path)
pa_fil = (
    pa[0]
    .set_units("1/s", distance="m", time="s")  # Set physical units
    .detrend("time")                           # Remove linear trend
    .decimate(time=20)                         # Downsample in time
    .taper(time=0.05)                          # Apply taper in time
    .pass_filter(time=(1 * Hz, 49 * Hz))       # Bandpass filter 
    .select(distance=(102, 222), samples=True) # Select a distance range
    .transpose('distance', 'time')             
)

### Using the Example Preprocessed Data

The file `example.h5` contains data that has already been preprocessed using the steps described above.


In [None]:
# Read the preprocessed example data and visualize it
pa_fil = dc.spool('./example.h5')

# Plot the preprocessed data
fig, ax = plt.subplots(figsize=(7, 5))
pa_fil[0].viz.waterfall(ax=ax,scale=0.0001)

### Velocity Spectral (p-f) Analysis

This section performs slowness-frequency transformation on the DAS data.  
The workflow includes:
- Chunking the data into overlapping time windows.
- Applying tapers in both time and distance to reduce edge effects.
- Performing a tau-p (slant stack) transform to map the data into the slowness domain.
- Computing the discrete Fourier transform (DFT) along the time axis to obtain the frequency content for each slowness.
- The resulting spectra are used for subsequent stacking, normalization, and dispersion curve picking.

In [None]:
# Define processing functions for each step
def taper(patch, time, window_type):
    """Apply a taper along the time axis."""
    return patch.taper(time=0.1, window_type=window_type)

def taper_d(patch, distance, window_type):
    """Apply a taper along the distance axis."""
    return patch.taper(distance=0.1, window_type=window_type)

def taup(patch, velocities):
    """Apply tau-p (slant stack) transform with given velocities."""
    return patch.tau_p(velocities)

def dft(patch, dim="time"):
    """Apply discrete Fourier transform along the specified dimension."""
    return patch.dft(dim="time", real=True)


# Process the data step by step
sp_fil = dc.spool(pa_fil)

# Chunk the data into segments of 30 seconds with 10 seconds overlap
sp_fil_chunked = sp_fil.chunk(time=30, overlap=10)

# Apply tapering in time
sp_fil_chunked_taper = dc.spool(
    sp_fil_chunked.map(taper, time=0.1, window_type="hann")
)

# Apply tapering in distance
sp_fil_chunked_taper_d = dc.spool(
    sp_fil_chunked_taper.map(taper_d, distance=0.1, window_type="hann")
)

# Apply tau-p transform with specified velocities
sp_fil_chunked_taper_taup = dc.spool(
    sp_fil_chunked_taper_d.map(taup, velocities=np.arange(100, 800, 20))
)

# Apply discrete Fourier transform along the time dimension
sp_fil_chunked_taper_taup_dft = dc.spool(
    sp_fil_chunked_taper_taup.map(dft, dim="time")
)

### Spectral normalization and stacking

This section computes the normalized and stacked power spectrum across all data chunks.  
For each chunk, the code:
- Calculates the power spectrum (magnitude squared of the DFT).
- Splits the spectrum into positive and negative slowness components and sums them to enforce symmetry.
- Normalizes each frequency by the average amplitude across slowness.
- Stacks (sums) the normalized spectra from all chunks to enhance coherent features and suppress noise.

The result, `summed_avg_stack`, is a 2D array representing the normalized, stacked power as a function of frequency and slowness, ready for visualization and dispersion analysis.

In [None]:
# Calculate number of frequency bins and midpoint
n_freq = sp_fil_chunked_taper_taup_dft[0].data.shape[0]
half = n_freq // 2

# Initialize the stack for the averaged, normalized spectra
if n_freq % 2 == 0:
    summed_avg_stack = np.zeros((half, sp_fil_chunked_taper_taup_dft[0].data.shape[1]))
else:
    summed_avg_stack = np.zeros((half + 1, sp_fil_chunked_taper_taup_dft[0].data.shape[1]))

# Loop through each chunked patch and sum normalized power spectra
for i, patch in enumerate(sp_fil_chunked_taper_taup_dft):
    
    # Compute power spectrum 
    power_spectrum = (patch.data * np.conj(patch.data)).real

    if n_freq % 2 == 0:
        # Split into negative and positive slowness components
        # Even: includes Nyquist frequency
        neg = power_spectrum[:half,:]
        pos = power_spectrum[half:,:]
        neg_flipped  = np.flipud(neg)
        summed = pos + neg_flipped
    else:
        # Split into negative and positive slowness components
        # Odd
        pos = power_spectrum[:half+1]
        neg = power_spectrum[half+1:]
        neg_flipped  = np.flipud(neg)
        summed = pos + neg_flipped[:pos.shape[0]]

    # Normalize by the average across slowness for each frequency
    avg = np.sum(summed, axis=0) / len(summed)
    summed_avg = summed/avg

    # Accumulate the normalized spectra
    summed_avg_stack = summed_avg_stack + summed_avg

### Rayleigh Phase-Velocity Dispersion Picking

This section enables interactive picking of the Rayleigh wave dispersion curve from the stacked slowness-frequency (p-f) spectrum.  

- The normalized and stacked power spectrum is displayed as an image, with frequency on the x-axis and apparent velocity on the y-axis.
- The user manually selects points along the visible dispersion curve by clicking on the plot; pressing Enter finishes the selection.
- The picked points are sorted and interpolated using a cubic spline to produce a smooth dispersion curve.
- The result is visualized by overlaying the picked points and the interpolated curve on the power spectrum image.

This process allows for extraction of the fundamental mode Rayleigh wave phase velocity dispersion curve for subsequent 1D shear-wave velocity inversion.

In [None]:
# Get frequency and slowness arrays
freq = sp_fil_chunked_taper_taup_dft[0].get_coord('ft_time')[:summed.shape[1]]
slowness = sp_fil_chunked_taper_taup_dft[0].get_coord('slowness')[half:]

# Create figure
fig, ax = plt.subplots(figsize=(10, 5))
extent = [freq[0], freq[-1], 1/slowness[0], 1/slowness[-1]] # Frequency and apparent velocity extent
im = ax.imshow(
    summed_avg_stack**2,
    aspect='auto',
    origin='lower',
    extent=extent,
    cmap='plasma',
    vmin=0,
    vmax=np.max(summed_avg_stack**2)*0.02  # Adjust for better visibility
)
ax.set_xlabel('Frequency (Hz)', fontsize=14)
ax.set_ylabel('Apparent Velocity (m/s)', fontsize=14)
ax.set_title('Click to pick points, then press Enter', fontsize=14)
ax.set_xlim(1, 20)
ax.set_ylim(100, 600)
fig.colorbar(im, ax=ax, label='Amplitude')

# Pick points on that same figure
print("Click to pick points along the dispersion curve. Press Enter when done.")
picked = fig.ginput(n=-1, timeout=0)  # block until Enter

plt.close(fig)

picked = np.array(picked)
if picked.shape[0] < 2:
    print("Not enough points picked for interpolation.")
else:
    # Sort and interpolate
    picked = picked[np.argsort(picked[:, 0])]
    cs = CubicSpline(picked[:, 0], picked[:, 1])
    freq_interp = np.linspace(picked[:, 0].min(), picked[:, 0].max(), 200)
    velocity_interp = cs(freq_interp)

    # Plot result with cubic spline interpolation
    fig2, ax2 = plt.subplots(figsize=(10, 5))
    im2 = ax2.imshow(
        summed_avg_stack**2,
        aspect='auto',
        origin='lower',
        extent=extent,
        cmap='plasma',
        vmin=0,
        vmax=np.max(summed_avg_stack**2)*0.02 
    )
    ax2.set_xlabel('Frequency (Hz)', fontsize=14)
    ax2.set_ylabel('Apparent Velocity (m/s)', fontsize=14)
    ax2.set_title('Manual Picked Dispersion and Cubic Spline Interpolation', fontsize=14)
    ax2.set_xlim(1, 20)
    ax2.set_ylim(100, 600)
    fig2.colorbar(im2, ax=ax2, label='Amplitude')
    ax2.plot(picked[:, 0], picked[:, 1], 'ro', label='Picked Points')
    ax2.plot(freq_interp, velocity_interp, 'b-', linewidth=2, label='Cubic Spline')
    ax2.legend()
    plt.show()


In [None]:
# Plot static image of the summed power spectrum
# Get frequency and slowness arrays
freq = sp_fil_chunked_taper_taup_dft[0].get_coord('ft_time')[:summed.shape[1]]
slowness = sp_fil_chunked_taper_taup_dft[0].get_coord('slowness')[half:]

plt.figure(figsize=(10, 5))
extent = [freq[0], freq[-1],  1/slowness[0], 1/slowness[-1],]
plt.imshow(
    summed_avg_stack**2,
    aspect='auto',
    origin='lower',
    extent=extent,
    cmap='plasma',
    vmin=0,
    vmax=np.max(summed_avg_stack**2)*0.02
)
plt.plot(picked[:, 0], picked[:, 1], 'ro', label='Picked Points')
plt.plot(freq_interp, velocity_interp, 'b-', linewidth=2, label='Cubic Spline')
plt.xlabel('Frequency (Hz)',fontsize=14)
plt.ylabel('Apparent Velocity (m/s)',fontsize=14)
plt.title('Summed Power Spectrum',fontsize=14)
plt.colorbar(label='Amplitude')
plt.xlim(1,20)
plt.ylim(100,600)
plt.show()