In [None]:
import os
import pandas as pd
import pyarrow.parquet as pq

# Specify the base directory for your Parquet files
base_directory = '/notebooks/output/ZZ/FWU1/HHD.E/*'  # All output files will be in this path

# Function to recursively find all Parquet files
def find_parquet_files(directory):
    parquet_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.parquet'):
                parquet_files.append(os.path.join(root, file))
    return parquet_files

# Find all Parquet files in the directory structure
parquet_files = find_parquet_files(base_directory)

if not parquet_files:
    print(f"No Parquet files found in {base_directory}")
else:
    print(f"Found {len(parquet_files)} Parquet files in {base_directory}")

    # Read all Parquet files into a single DataFrame
    dfs = []
    for file in parquet_files:
        try:
            df = pd.read_parquet(file)
            dfs.append(df)
            # print(f"Successfully read: {file}")
        except Exception as e:
            print(f"Error reading {file}: {str(e)}")

    # Combine all DataFrames
    combined_df = pd.concat(dfs, ignore_index=True)
    
    # Display information about the combined DataFrame
    print("\nCombined DataFrame:")
    print(combined_df.info())
    print(combined_df.head())
    print(combined_df.tail())

    # Print some statistics
    print("\nData statistics:")
    print(combined_df['data'].describe())


No Parquet files found in /notebooks/output/


In [None]:
import os
import obspy
import numpy as np
import pandas as pd

# Specify the base directory for your miniSEED files
base_directory = '/path/to/your/miniseed/files'

def find_miniseed_files(directory):
    miniseed_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith('.mseed') or file.lower().endswith('.msd'):
                miniseed_files.append(os.path.join(root, file))
    return miniseed_files

# Find all miniSEED files in the directory structure
miniseed_files = find_miniseed_files(base_directory)

if not miniseed_files:
    print(f"No miniSEED files found in {base_directory}")
else:
    print(f"Found {len(miniseed_files)} miniSEED files in {base_directory}")

    # Read all miniSEED files into a list of Stream objects
    streams = []
    for file in miniseed_files:
        try:
            stream = obspy.read(file)
            streams.append(stream)
            print(f"Successfully read: {file}")
        except Exception as e:
            print(f"Error reading {file}: {str(e)}")

    # Extract relevant information from the streams
    data_list = []
    for stream in streams:
        for trace in stream:
            data_dict = {
                'network': trace.stats.network,
                'station': trace.stats.station,
                'location': trace.stats.location,
                'channel': trace.stats.channel,
                'starttime': trace.stats.starttime.datetime,
                'endtime': trace.stats.endtime.datetime,
                'sampling_rate': trace.stats.sampling_rate,
                'npts': trace.stats.npts,
                'data': trace.data.tolist()
            }
            data_list.append(data_dict)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(data_list)

    # Display information about the combined DataFrame
    print("\nCombined DataFrame:")
    print(df.info())
    print(df.head())
    print(df.tail())

    # Print some statistics
    print("\nData statistics:")
    print(df.describe())