# SPECFEM3D Simulation for Seismic Data Generation

This notebook demonstrates how to set up and run SPECFEM3D simulations to generate synthetic seismic data for the interpolation project.

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import shutil

# Add the project root to path for imports
sys.path.append('..')

# Import project modules
from src.simulation.specfem_runner import SpecfemSimulation
from src.utils.logging_utils import setup_logging
from src.utils.plot_utils import plot_seismic_trace, plot_seismic_gather

# Set up logging
logger = setup_logging(level='INFO')

## Configuration

Define paths and parameters for the simulation.

In [None]:
# Paths
specfem_dir = os.path.expanduser("~/specfem3d")  # Path to SPECFEM3D installation
notebook_dir = os.path.dirname(os.path.abspath("__file__"))
project_root = os.path.abspath(os.path.join(notebook_dir, ".."))
data_dir = os.path.join(project_root, "data/synthetic/raw/simulation1")
templates_dir = os.path.join(project_root, "specfem_simulations")

# Ensure directories exist
Path(data_dir).mkdir(parents=True, exist_ok=True)
Path(templates_dir).mkdir(parents=True, exist_ok=True)

In [None]:
# Define simulation parameters
simulation_params = {
    # General simulation parameters
    "NUMBER_OF_SIMULTANEOUS_RUNS": 1,  # Usually set to 1 for single simulations
    "NPROC": 4,  # Number of MPI processes
    "NPROC_XI": 2,  # Number of processes along X direction
    "NPROC_ETA": 2,  # Number of processes along Y direction
    "SIMULATION_TYPE": 1,  # 1 = forward simulation
    "NSTEP": 4000,  # Number of time steps
    "DT": 0.001,  # Time step in seconds
    "MODEL": "default",  # Model type
    "SAVE_FORWARD": ".false.",  # Don't save forward wavefield
    "USE_OLSEN_ATTENUATION": ".false.",  # No attenuation
    "NGNOD": 8,  # Number of nodes per element
    "ABSORBING_CONDITIONS": ".true.",  # Absorbing boundary conditions
    "STACEY_ABSORBING_CONDITIONS": ".true.",  # Use Stacey absorbing boundary conditions
    "ATTENUATION": ".false.",  # No attenuation
    "USE_RICKER_TIME_FUNCTION": ".true.",  # Use Ricker wavelet
    
    # Output parameters
    "SAVE_SEISMOGRAMS_DISPLACEMENT": ".true.",  # Output displacement
    "NTSTEP_BETWEEN_OUTPUT_SEISMOS": 10,  # Output seismograms every 10 steps
    "USE_BINARY_FOR_SEISMOGRAMS": ".false.",  # Save ASCII seismograms
    "SAVE_BINARY_SEISMOGRAMS_SINGLE": ".true.",  # Save binary seismograms
    "SAVE_BINARY_SEISMOGRAMS_DOUBLE": ".false.",  # Don't save double precision binary
    "USE_EXISTING_STATIONS": ".true.",  # Use the STATIONS file we created
    
    # Cartesian mesh parameters
    "LATITUDE_MIN": 0.0,  # Min X coordinate of mesh
    "LATITUDE_MAX": 2000.0,  # Max X coordinate of mesh
    "LONGITUDE_MIN": 0.0,  # Min Y coordinate of mesh
    "LONGITUDE_MAX": 2000.0,  # Max Y coordinate of mesh
    "DEPTH_MIN": 0.0,  # Min Z coordinate of mesh
    "DEPTH_MAX": 2000.0,  # Max Z coordinate of mesh
    "NEX_XI": 40,  # Number of elements along X direction
    "NEX_ETA": 40,  # Number of elements along Y direction
    "NEX_ZETA": 40,  # Number of elements along Z direction
}

In [None]:
# Define source parameters
source_params = {
    "source_surf": 0,  # Source is inside the medium
    "xs": 1000.0,  # X position in meters
    "ys": 1000.0,  # Y position in meters
    "zs": 500.0,  # Z position in meters (depth positive downward)
    "source_type": 1,  # 1 = force, 2 = moment tensor
    "time_function_type": 2,  # Ricker wavelet
    "name_of_source_file": "",  # Not used for simple source
    "burst_band_width": 0.0,  # Not used for Ricker wavelet
    "f0": 10.0,  # Central frequency in Hz
    "tshift": 0.0,  # Time shift
    "anglesource": 0.0,  # If source_type = 1, angle of force source
    "Mxx": 1.0,  # If source_type = 2, moment tensor components
    "Mxy": 0.0,
    "Mxz": 0.0,
    "Myy": 1.0,
    "Myz": 0.0,
    "Mzz": 1.0
}

## Set Up Station Locations

Define the locations of the receivers (geophones).

In [None]:
# Create station locations
n_stations = 20
stations = pd.DataFrame({
    'name': [f'ST{i:03d}' for i in range(1, n_stations + 1)],
    'network': ['GE'] * n_stations,
    'lat': np.linspace(500, 1500, n_stations),  # X coordinates
    'lon': [1000.0] * n_stations,  # Y coordinates
    'elevation': [0.0] * n_stations,
    'burial': [0.0] * n_stations
})

# Save stations to file
stations_file = os.path.join(data_dir, 'STATIONS')
stations.to_csv(stations_file, sep=' ', index=False, header=False)
print(f"Created {n_stations} stations in {stations_file}")

Created 20 stations in /home/masa/ml_interpolation/data/synthetic/raw/simulation1/STATIONS


## Run SPECFEM3D Simulation

Now we'll run the simulation using our configured parameters.

In [None]:
# Add NUMBER_OF_SIMULTANEOUS_RUNS to simulation parameters
simulation_params.update({
    "BROADCAST_SAME_MESH_AND_MODEL": ".true.",
    "USE_REGULAR_MESH": ".true.",
    "STACEY_INSTEAD_OF_FREE_SURFACE": ".false.",
    "ROTATE_PML_ACTIVATE": ".false.",
    "PRINT_SOURCE_TIME_FUNCTION": ".false.",
    "GPU_MODE": ".false.",
    "SAVE_MESH_FILES": ".false.",
    "SUPPRESS_UTM_PROJECTION": ".true."  # Use Cartesian coordinates directly
})

# Initialize simulation with explicit number of processes
simulation = SpecfemSimulation(
    specfem_dir=specfem_dir,
    output_dir=data_dir,
    nproc=simulation_params["NPROC"]
)

# Run the full simulation with our complete parameter set
simulation.run_full_simulation(
    simulation_params=simulation_params,
    source_params=source_params,
    stations_list=stations.to_dict('records')
)

Cleaning previous simulation outputs...
Cleaned DATABASES_MPI directory
Cleaned OUTPUT_FILES directory
2025-03-31 20:23:21 - src.simulation.specfem_runner - INFO - Prepared SOURCE file at /home/masa/ml_interpolation/data/synthetic/raw/simulation1/SOURCE
2025-03-31 20:23:21 - src.simulation.specfem_runner - INFO - Prepared STATIONS file with 20 stations


Running xmeshfem3D...
Executing: mpirun -np 4 ./bin/xmeshfem3D
2025-03-31 20:23:22 - src.simulation.specfem_runner - ERROR - Meshing failed: STOP Error reading Par_file parameter NUMBER_OF_SIMULTANEOUS_RUNS
--------------------------------------------------------------------------
mpirun has exited due to process rank 0 with PID 0 on
node masa-ubuntu22 exiting improperly. There are three reasons this could occur:

1. this process did not call "init" before exiting, but others in
the job did. This can cause a job to hang indefinitely while it waits
for all processes to call "init". By rule, if one process calls "init",
then ALL processes must call "init" prior to termination.

2. this process called "init", but exited without calling "finalize".
By rule, all processes that call "init" MUST call "finalize" prior to
exiting or it will be considered an "abnormal termination"

3. this process called "MPI_Abort" or "orte_abort" and the mca parameter
orte_create_session_dirs is set to false. 

False

## Visualize Results

Plot the generated seismograms.

In [None]:
# Plot individual traces
seismogram_dir = os.path.join(data_dir, 'OUTPUT_FILES')
plot_seismic_trace(seismogram_dir, station='ST001')

# Plot gather
plot_seismic_gather(seismogram_dir)

TypeError: plot_seismic_trace() got an unexpected keyword argument 'station'