In [None]:
import pandas as pd
import os

# Load seismic event file with relative times and filenames
events_df = pd.read_csv('path_to_event_log.csv')

# Function to check if a time falls within a cluster's time range
def is_time_in_cluster(cluster_start, cluster_end, event_time):
    return cluster_start <= event_time <= cluster_end

# Function to process the seismic data and match clusters with events
def process_seismic_clusters(input_folder, output_folder, event_data):
    # Loop through all processed files in the input folder
    for file in os.listdir(input_folder):
        if file.endswith('.csv'):
            input_file_path = os.path.join(input_folder, file)
            
            # Load the seismic data processed by Octave (contains cluster ids and relative times)
            seismic_df = pd.read_csv(input_file_path)

            # Remove rows where cluster_id is 0 (no cluster)
            seismic_df = seismic_df[seismic_df['cluster_id'] != 0]
            
            # Ensure the filename matches between the event log and processed data
            matching_events = event_data[event_data['filename'] == file]
            
            # Loop over the clusters in the seismic data
            for cluster_id in seismic_df['cluster_id'].unique():
                # Get the data corresponding to the current cluster
                cluster_data = seismic_df[seismic_df['cluster_id'] == cluster_id]
                
                # Determine the start and end time of the cluster (in relative time)
                cluster_start = cluster_data['time_rel'].min()
                cluster_end = cluster_data['time_rel'].max()
                
                # Initialize the cluster label to 0 (non-quake)
                seismic_df.loc[seismic_df['cluster_id'] == cluster_id, 'quake_cluster'] = 0

                # Loop through matching events to check if they fall within the cluster's time range
                for _, event_row in matching_events.iterrows():
                    event_time = event_row['time_rel']
                    
                    if is_time_in_cluster(cluster_start, cluster_end, event_time):
                        # If the event time falls within the cluster range, mark the cluster as a quake
                        seismic_df.loc[seismic_df['cluster_id'] == cluster_id, 'quake_cluster'] = 1

            # Save the processed data with updated quake labels to the output folder
            output_file_path = os.path.join(output_folder, file)
            seismic_df.to_csv(output_file_path, index=False)

# Paths for your input and output folders
input_folder = 'path_to_octave_output_folder'
output_folder = 'path_to_save_processed_files'

# Ensure the output folder exists
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Call the function to process the seismic clusters and match them with event times
process_seismic_clusters(input_folder, output_folder, events_df)

print("Seismic clusters processed and saved to the output folder.")
