Accurately working 

In [10]:
import os
import numpy as np
import matplotlib.pyplot as plt
from obspy import read, UTCDateTime
from pathlib import Path
import pandas as pd

# Directories
input_dir = Path("miniSEED_files")
output_dir = Path("downsampled_graphs")
output_dir.mkdir(exist_ok=True)  # Create output folder if not exists

# Load metadata (assumed to have P and S pick times)
metadata_file = "dataset_earthquakes/metadata.csv"
pick_data = pd.read_csv(metadata_file, usecols=['trace_name_original_Z', 'trace_p_pick_time', 'trace_s_pick_time'])

# Function to sanitize file names
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Loop through each miniSEED file
for file_path in input_dir.glob("*.MSEED"):
    file_name_sanitized = sanitize_filename(file_path.name.strip())

    # Find matching metadata row
    matched_row = pick_data[pick_data['trace_name_original_Z'].apply(sanitize_filename) == file_name_sanitized]
    if matched_row.empty:
        print(f"No metadata found for {file_path.name}, skipping...")
        continue

    p_pick_time = matched_row.iloc[0]['trace_p_pick_time']
    s_pick_time = matched_row.iloc[0]['trace_s_pick_time']

    # Convert to UTCDateTime (skip if missing)
    if pd.isna(p_pick_time) or pd.isna(s_pick_time):
        print(f"Skipping {file_path.name} (Missing P or S pick time)")
        continue

    p_pick_time = UTCDateTime(p_pick_time)
    s_pick_time = UTCDateTime(s_pick_time)

    # Read the miniSEED file
    stream = read(file_path)

    for trace in stream:
        # Original data
        original_data = trace.data
        original_sampling_rate = trace.stats.sampling_rate
        start_time = trace.stats.starttime

        # Time axis for original data in UTC
        time_original = np.linspace(start_time.timestamp, start_time.timestamp + len(original_data) / original_sampling_rate, len(original_data))
        time_original = [UTCDateTime(t).datetime for t in time_original]

        # Downsample only if original SPS is 250
        if original_sampling_rate == 250:
            downsampled_trace = trace.copy()
            downsampled_trace.decimate(factor=5, no_filter=False)  # Anti-aliasing FIR filter applied

            # Extract downsampled data
            downsampled_data = downsampled_trace.data
            downsampled_sampling_rate = downsampled_trace.stats.sampling_rate

            # Time axis for downsampled data in UTC
            time_downsampled = np.linspace(start_time.timestamp, start_time.timestamp + len(downsampled_data) / downsampled_sampling_rate, len(downsampled_data))
            time_downsampled = [UTCDateTime(t).datetime for t in time_downsampled]

            # Plot
            plt.figure(figsize=(12, 5))
            plt.plot(time_original, original_data, label="Original (250 SPS)", alpha=1, color='blue')
            plt.plot(time_downsampled, downsampled_data, label="Downsampled (100 SPS)", alpha=0.7, linestyle="dashed", linewidth=2, color='orange')

            # Mark P and S pick times
            plt.axvline(p_pick_time.datetime, color='r', linestyle='--', label="P-Pick Time")
            plt.axvline(s_pick_time.datetime, color='b', linestyle='--', label="S-Pick Time")

            plt.xlabel("Time (UTC)")
            plt.ylabel("Amplitude")
            plt.legend()
            plt.title(f"Downsampling {trace.id} with P & S Picks")
            plt.grid()

            # Save plot
            plot_filename = output_dir / f"{file_path.stem}_{trace.id}.png"
            plt.savefig(plot_filename)
            plt.close()

            print(f"Saved plot: {plot_filename}")

print("All plots saved successfully!")


No metadata found for 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN1.MSEED, skipping...
No metadata found for 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN2.MSEED, skipping...
Saved plot: downsampled_graphs\34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DNZ_WS.POZA.S5.DNZ.png
No metadata found for 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN1.MSEED, skipping...
No metadata found for 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN2.MSEED, skipping...
Saved plot: downsampled_graphs\34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DNZ_WS.POZA.S3.DNZ.png
No metadata found for 34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DN1.MSEED, skipping...
No metadata found for 34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DN2.MSEED, skipping...
Saved plot: downsampled_graphs\34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DNZ_WS.POZA.S2.DNZ.png
No metadata found for 34161341_2023-02-21T00.07.00.490708Z_WS.POZA.S4.DN1.MSEED, skipping...
No metadata found for 34161341_2023-02-21T0

Below these all the files are having some sort of errors the code above is correct and working fine so use it for usage

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from obspy import read, UTCDateTime
from pathlib import Path
import pandas as pd
from scipy.signal import resample_poly

# Directories
mseed_dir = "miniSEED_files"
output_dir = "downsampled_graphs"
metadata_file = "dataset_earthquakes/metadata.csv"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Load metadata
pick_data = pd.read_csv(metadata_file, usecols=['trace_name_original_Z', 'trace_p_pick_time', 'trace_s_pick_time'])

# Function to sanitize file names
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Downsampling function with anti-aliasing filter
def downsample_signal(signal, orig_sps=250, target_sps=100):
    return resample_poly(signal, up=target_sps, down=orig_sps)

# Process each miniSEED file
for file_path in Path(mseed_dir).glob("*.MSEED"):
    file_name_sanitized = sanitize_filename(file_path.name.strip())

    # Find matching metadata row
    matched_row = pick_data[pick_data['trace_name_original_Z'].apply(sanitize_filename) == file_name_sanitized]
    if matched_row.empty:
        print(f"No metadata found for {file_path.name}, skipping...")
        continue

    p_pick_time = matched_row.iloc[0]['trace_p_pick_time']
    s_pick_time = matched_row.iloc[0]['trace_s_pick_time']

    # Convert to UTCDateTime (handle missing values)
    p_pick_time = UTCDateTime(p_pick_time) if pd.notna(p_pick_time) else None
    s_pick_time = UTCDateTime(s_pick_time) if pd.notna(s_pick_time) else None

    # Read the miniSEED file
    stream = read(file_path)
    for trace in stream:
        print(f"Processing trace: {trace.id}")

        # Original data
        original_data = trace.data
        original_sampling_rate = trace.stats.sampling_rate
        total_samples = len(original_data)

        # Time axis for original data
        start_time = trace.stats.starttime
        time_original = np.linspace(start_time.timestamp, start_time.timestamp + total_samples / original_sampling_rate, total_samples)
        time_original = [UTCDateTime(t).datetime for t in time_original]

        # Downsample only if original SPS is 250
        if original_sampling_rate == 250:
            downsampled_trace = trace.copy()
            downsampled_data = downsample_signal(original_data, orig_sps=250, target_sps=100)
            downsampled_sampling_rate = 100
            downsampled_samples = len(downsampled_data)

            # Time axis for downsampled data
            time_downsampled = np.linspace(start_time.timestamp, start_time.timestamp + downsampled_samples / downsampled_sampling_rate, downsampled_samples)
            time_downsampled = [UTCDateTime(t).datetime for t in time_downsampled]

            # Plot both original and downsampled signals
            plt.figure(figsize=(12, 5))
            plt.plot(time_original, original_data, label="Original (250 SPS)", color='black', alpha=0.7)
            plt.plot(time_downsampled, downsampled_data, label="Downsampled (100 SPS)", color='blue', linestyle="dashed", linewidth=2, alpha=0.8)

            # Plot P-Pick Time if available
            if p_pick_time:
                plt.axvline(p_pick_time.datetime, color='r', linestyle='--', label="P-Pick Time")

            # Plot S-Pick Time if available
            if s_pick_time:
                plt.axvline(s_pick_time.datetime, color='b', linestyle='--', label="S-Pick Time")

            plt.xlabel("Time (GMT)")
            plt.ylabel("Amplitude")
            plt.legend()
            plt.title(f"Original vs Downsampled Seismic Signal for {file_path.name}")
            plt.grid()

            # Save the plot
            output_file = os.path.join(output_dir, f"{file_name_sanitized}.png")
            plt.savefig(output_file)
            plt.close()

            print(f"Saved plot: {output_file}")

print("All plots saved successfully!")


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from obspy import read
from pathlib import Path

# Directories
input_dir = Path("miniSEED_files")
output_dir = Path("downsampled_graphs")
output_dir.mkdir(exist_ok=True)  # Create output folder if not exists

# Loop through each miniSEED file
for file_path in input_dir.glob("*.MSEED"):
    # Read the miniSEED file
    stream = read(file_path)
    
    for trace in stream:
        # Original data
        original_data = trace.data
        original_sampling_rate = trace.stats.sampling_rate
        
        # Downsample only if original SPS is 250
        if original_sampling_rate == 250:
            downsampled_trace = trace.copy()
            downsampled_trace.decimate(factor=5, no_filter=False)  # Anti-aliasing FIR filter applied
            
            # Extract downsampled data
            downsampled_data = downsampled_trace.data
            downsampled_sampling_rate = downsampled_trace.stats.sampling_rate

            # Time axis
            time_original = np.linspace(0, len(original_data) / original_sampling_rate, num=len(original_data))
            time_downsampled = np.linspace(0, len(downsampled_data) / downsampled_sampling_rate, num=len(downsampled_data))

            # Plot
            plt.figure(figsize=(10, 5))
            plt.plot(time_original, original_data, label="Original (250 SPS)", alpha=1)
            plt.plot(time_downsampled, downsampled_data, label="Downsampled (100 SPS)",alpha=0.5, linestyle="dashed", linewidth=2)
            plt.xlabel("Time (s)")
            plt.ylabel("Counts")
            plt.legend()
            plt.title(f"Downsampling {trace.id}")
            plt.grid()

            # Save plot
            plot_filename = output_dir / f"{file_path.stem}_{trace.id}.png"
            plt.savefig(plot_filename)
            plt.close()
            
            print(f"Saved plot: {plot_filename}")

print("All plots saved successfully!")


Modified code along with gmt time lable and downsampled data to 100 hz and plotting 

In [None]:

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from obspy import read, UTCDateTime
from pathlib import Path

# Load pick data (metadata)
metadata_file = "dataset_earthquakes/metadata.csv"
pick_data = pd.read_csv(metadata_file, usecols=['trace_name_original_Z', 'trace_p_pick_time', 'trace_s_pick_time'])

# Function to sanitize file names (from your previous code)
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Directories
input_dir = Path("miniSEED_files")
output_dir = Path("downsampled_graphs")
output_dir.mkdir(exist_ok=True)  # Create output folder if not exists

# Loop through each miniSEED file
for file_path in input_dir.glob("*.MSEED"):
    sanitized_name = sanitize_filename(file_path.name.strip())

    # Match the pick times using sanitized filenames
    matched_row = pick_data[pick_data['trace_name_original_Z'].apply(sanitize_filename) == sanitized_name]
    
    if matched_row.empty:
        print(f"No metadata found for {file_path.name}, skipping...")
        continue

    # p_pick_time = UTCDateTime(matched_row.iloc[0]['trace_p_pick_time'])
    # s_pick_time = UTCDateTime(matched_row.iloc[0]['trace_s_pick_time'])
    
    # Extract pick times as strings
    p_pick_time_value = matched_row.iloc[0]['trace_p_pick_time']
    s_pick_time_value = matched_row.iloc[0]['trace_s_pick_time']

    # Convert only if value is valid (not NaN)
    p_pick_time = UTCDateTime(p_pick_time_value) if pd.notna(p_pick_time_value) else None
    s_pick_time = UTCDateTime(s_pick_time_value) if pd.notna(s_pick_time_value) else None

    # Print to verify
    print("P-Pick Time:", p_pick_time)
    print("S-Pick Time:", s_pick_time)


    # p_pick_time = UTCDateTime(str(matched_row.iloc[0]['trace_p_pick_time']))
    # s_pick_time = UTCDateTime(str(matched_row.iloc[0]['trace_s_pick_time']))

    # Read the miniSEED file
    stream = read(file_path)
    
    for trace in stream:
        # Original data
        original_data = trace.data
        original_sampling_rate = trace.stats.sampling_rate
        start_time = trace.stats.starttime

        # Downsample only if original SPS is 250
        if original_sampling_rate == 250:
            downsampled_trace = trace.copy()
            downsampled_trace.decimate(factor=5, no_filter=False)  # Anti-aliasing FIR filter applied
            
            # Extract downsampled data
            downsampled_data = downsampled_trace.data
            downsampled_sampling_rate = downsampled_trace.stats.sampling_rate
            downsampled_start_time = downsampled_trace.stats.starttime

            # Time axis in GMT (UTC)
            time_original = np.array([start_time + (i / original_sampling_rate) for i in range(len(original_data))])
            time_downsampled = np.array([downsampled_start_time + (i / downsampled_sampling_rate) for i in range(len(downsampled_data))])

            # Convert time to GMT strings for plotting
            time_original_gmt = [UTCDateTime(t).datetime for t in time_original]
            time_downsampled_gmt = [UTCDateTime(t).datetime for t in time_downsampled]

            # Plot
            plt.figure(figsize=(10, 5))
            plt.plot(time_original_gmt, original_data, label="Original (250 SPS)", alpha=1)
            plt.plot(time_downsampled_gmt, downsampled_data, label="Downsampled (100 SPS)", alpha=1, linestyle="dashed", linewidth=2)

            # Plot P-Pick and S-Pick
            plt.axvline(p_pick_time.datetime, color='r', linestyle='--', label="P-Pick Time")
            plt.axvline(s_pick_time.datetime, color='b', linestyle='--', label="S-Pick Time")

            plt.xlabel("Time (GMT)")
            plt.ylabel("Amplitude")
            plt.legend()
            plt.title(f"Downsampling {trace.id} with Picks")
            plt.xticks(rotation=20)
            plt.grid()

            # Save plot
            plot_filename = output_dir / f"{sanitized_name}_{trace.id}.png"
            plt.savefig(plot_filename)
            plt.close()
            
            print(f"Saved plot: {plot_filename}")

print("All plots saved successfully!")


In [5]:
import os
from obspy import read, UTCDateTime
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd
from scipy.signal import resample_poly

# Directories
mseed_dir = "miniSEED_files"
output_dir = "downsampled_graphs"
metadata_file = "dataset_earthquakes/metadata.csv"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Load metadata
pick_data = pd.read_csv(metadata_file, usecols=['trace_name_original_Z', 'trace_p_pick_time', 'trace_s_pick_time'])

# Function to sanitize file names
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Downsample function using an anti-aliasing filter
def downsample_signal(signal, orig_sps=250, target_sps=100):
    factor = target_sps / orig_sps
    return resample_poly(signal, up=target_sps, down=orig_sps)

# Process each miniSEED file
for file_path in Path(mseed_dir).glob("*.MSEED"):
    file_name_sanitized = sanitize_filename(file_path.name.strip())

    # Find matching metadata row
    matched_row = pick_data[pick_data['trace_name_original_Z'].apply(sanitize_filename) == file_name_sanitized]
    if matched_row.empty:
        print(f"No metadata found for {file_path.name}, skipping...")
        continue

    p_pick_time = matched_row.iloc[0]['trace_p_pick_time']
    s_pick_time = matched_row.iloc[0]['trace_s_pick_time']

    # Convert to UTCDateTime (handle missing values)
    p_pick_time = UTCDateTime(p_pick_time) if pd.notna(p_pick_time) else None
    s_pick_time = UTCDateTime(s_pick_time) if pd.notna(s_pick_time) else None

    # Read the miniSEED file
    stream = read(file_path)
    for trace in stream:
        print(f"Processing trace: {trace.id}")

        # Downsample the signal
        downsampled_data = downsample_signal(trace.data, orig_sps=250, target_sps=100)

        # Generate time axis in GMT
        start_time = trace.stats.starttime
        downsampled_times = np.linspace(start_time.timestamp, start_time.timestamp + len(downsampled_data) / 100, len(downsampled_data))
        downsampled_times = [UTCDateTime(t).datetime for t in downsampled_times]

        # Plot the waveform
        plt.figure(figsize=(12, 5))
        plt.plot(downsampled_times, downsampled_data, label="Seismic Signal (100 SPS)", color='black')

        # Plot P-Pick Time if available
        if p_pick_time:
            plt.axvline(p_pick_time.datetime, color='r', linestyle='--', label="P-Pick Time")

        # Plot S-Pick Time if available
        if s_pick_time:
            plt.axvline(s_pick_time.datetime, color='b', linestyle='--', label="S-Pick Time")

        plt.xlabel("Time (GMT)")
        plt.ylabel("Amplitude")
        plt.legend()
        plt.title(f"Downsampled Seismic Signal for {file_path.name}")
        plt.grid()

        # Save the plot
        output_file = os.path.join(output_dir, f"{file_name_sanitized}.png")
        plt.savefig(output_file)
        plt.close()

        print(f"Saved plot: {output_file}")

print("All plots saved successfully!")


No metadata found for 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN1.MSEED, skipping...
No metadata found for 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN2.MSEED, skipping...
Processing trace: WS.POZA.S5.DNZ
Saved plot: downsampled_graphs\3416134120230221T000700489723ZWSPOZAS5DNZMSEED.png
No metadata found for 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN1.MSEED, skipping...
No metadata found for 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN2.MSEED, skipping...
Processing trace: WS.POZA.S3.DNZ
Saved plot: downsampled_graphs\3416134120230221T000700490024ZWSPOZAS3DNZMSEED.png
No metadata found for 34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DN1.MSEED, skipping...
No metadata found for 34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DN2.MSEED, skipping...
Processing trace: WS.POZA.S2.DNZ
Saved plot: downsampled_graphs\3416134120230221T000700490032ZWSPOZAS2DNZMSEED.png
No metadata found for 34161341_2023-02-21T00.07.00.490708Z_WS.POZA.S4.DN1.MSEED, skipping...
No m