In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from datetime import datetime

# Example folder path
data_path = '../NASAResources/space_apps_2024_seismic_detection/data/lunar/training/data/S12_GradeA/xa.s12.00.mhz.1970-04-25HR00_evid00006.csv'
# Load the catalog (starting points of quakes)
catalog_path = '../NASAResources/space_apps_2024_seismic_detection/data/lunar/training/catalogs/apollo12_catalog_GradeA_final.csv'
catalog = pd.read_csv(catalog_path)
data = pd.read_csv(data_path)

In [2]:
# Function to reformat the absolute time by setting the fractional seconds to .00000
def reformat_abs_time(time_str):
    dt = datetime.strptime(time_str, '%Y-%m-%dT%H:%M:%S.%f')
    reformatted_time = dt.strftime('%Y-%m-%dT%H:%M:00') + '.000000'
    return reformatted_time

# Apply the reformatting function to the 'time_abs' column
data['time_abs'] = data['time_abs(%Y-%m-%dT%H:%M:%S.%f)'].apply(reformat_abs_time)

# Processing data folder
data['time_rel'] = data['time_rel(sec)'].astype(int)
data['velocity'] = data['velocity(m/s)']

features = data[['time_rel', 'velocity', 'time_abs']]

catalog['time_rel'] = catalog['time_rel(sec)'].astype(int)
catalog['time_abs'] = catalog['time_abs(%Y-%m-%dT%H:%M:%S.%f)']


In [3]:
print(data['time_rel'].iloc[486944])

73500


In [4]:
quake_start_times_rel = catalog['time_rel'].to_numpy()
possible_quake_start_times_rel = data['time_rel'].to_numpy()
quake_start_times_abs = catalog['time_abs'].to_numpy()
possible_quake_start_times_abs = data['time_abs'].to_numpy()

def label_data(data, catalog):  
    # labels_rel = np.isin(possible_quake_start_times_rel, quake_start_times_rel )
    # labels_abs = np.isin(possible_quake_start_times_abs, quake_start_times_abs)
    # matching_values_rel = possible_quake_start_times_rel[labels_rel]
    # matching_values_abs = possible_quake_start_times_abs[labels_abs]
    # indexes_of_ones = np.where(labels_rel == 1)[0]

    # Create a boolean mask for matching time_rel
    mask_rel = data['time_rel'].isin(catalog['time_rel'])
    # Filter data and catalog based on the mask
    filtered_data = data[mask_rel]
    filtered_catalog = catalog[catalog['time_rel'].isin(filtered_data['time_rel'])]
    # Create a boolean mask for matching time_abs in the filtered data
    mask_abs = filtered_data.apply(lambda row: row['time_abs'] in filtered_catalog[filtered_catalog['time_rel'] == row['time_rel']]['time_abs'].values, axis=1)
    # Initialize labels array with zeros
    labels = np.zeros(len(data), dtype=int)
    # Set labels to 1 where both masks are True
    labels[mask_rel[mask_rel].index[mask_abs]] = 1
    
    return labels

# Apply labeling
labels = label_data(data, catalog)
#print(labels)
# Print the values corresponding to the 7 matches
matching_rows = data[labels == 1]
print(matching_rows)



      time_abs(%Y-%m-%dT%H:%M:%S.%f)  time_rel(sec)  velocity(m/s)  \
29415     1970-04-25T01:14:00.196000    4440.000000   2.193395e-10   
29416     1970-04-25T01:14:00.346943    4440.150943   1.753184e-10   
29417     1970-04-25T01:14:00.497887    4440.301887   5.067076e-11   
29418     1970-04-25T01:14:00.648830    4440.452830  -8.193815e-11   
29419     1970-04-25T01:14:00.799774    4440.603774  -1.587732e-10   
29420     1970-04-25T01:14:00.950717    4440.754717  -1.636253e-10   
29421     1970-04-25T01:14:01.101660    4440.905660  -1.298771e-10   

                         time_abs  time_rel      velocity  
29415  1970-04-25T01:14:00.000000      4440  2.193395e-10  
29416  1970-04-25T01:14:00.000000      4440  1.753184e-10  
29417  1970-04-25T01:14:00.000000      4440  5.067076e-11  
29418  1970-04-25T01:14:00.000000      4440 -8.193815e-11  
29419  1970-04-25T01:14:00.000000      4440 -1.587732e-10  
29420  1970-04-25T01:14:00.000000      4440 -1.636253e-10  
29421  1970-04-25T0

In [5]:
quake_start_count = np.sum(labels)
print(quake_start_count)

7
