In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Conv1D, MaxPooling1D, Flatten, Dropout
from keras.callbacks import EarlyStopping


In [2]:
import os

# Set the directory for your training data
train_data_dir = 'C:/Users/nm_ma/Desktop/space_apps_2024_seismic_detection/data/lunar/training/data/S12_GradeA'

# List to hold DataFrames
dataframes = []

# Loop through all CSV files in the training folder
for filename in os.listdir(train_data_dir):
    if filename.endswith('.csv'):
        file_path = os.path.join(train_data_dir, filename)
        df = pd.read_csv(file_path)
        dataframes.append(df)

# Concatenate all DataFrames into a single DataFrame
training_data = pd.concat(dataframes, ignore_index=True)

# Display the first few rows of the merged DataFrame
print(training_data.head())


  time_abs(%Y-%m-%dT%H:%M:%S.%f)  time_rel(sec)  velocity(m/s)
0     1970-01-19T00:00:00.665000       0.000000  -6.153279e-14
1     1970-01-19T00:00:00.815943       0.150943  -7.701288e-14
2     1970-01-19T00:00:00.966887       0.301887  -8.396187e-14
3     1970-01-19T00:00:01.117830       0.452830  -8.096155e-14
4     1970-01-19T00:00:01.268774       0.603774  -7.097599e-14


In [3]:
training_data.tail()

Unnamed: 0,time_abs(%Y-%m-%dT%H:%M:%S.%f),time_rel(sec),velocity(m/s)
43175424,1975-06-27T00:00:00.843887,86400.301887,-3.982647e-16
43175425,1975-06-27T00:00:00.994830,86400.45283,-5.580877e-16
43175426,1975-06-27T00:00:01.145774,86400.603774,-6.563002e-16
43175427,1975-06-27T00:00:01.296717,86400.754717,-3.281501e-16
43175428,1975-06-27T00:00:01.447660,86400.90566,0.0


In [4]:

def label_matching_rows(training_data_df, catalog_file):
    # Read the catalog file
    catalog_df = pd.read_csv(catalog_file)

    # Ensure the timestamp is in the correct format
    catalog_df['time_abs'] = pd.to_datetime(catalog_df['time_abs(%Y-%m-%dT%H:%M:%S.%f)'], format='%Y-%m-%dT%H:%M:%S.%f')

    # Convert time_abs in the training DataFrame to datetime format
    training_data_df['time_abs'] = pd.to_datetime(training_data_df['time_abs(%Y-%m-%dT%H:%M:%S.%f)'], format='%Y-%m-%dT%H:%M:%S.%f')

    # Merge the DataFrames on the time_abs column
    merged_df = training_data_df.merge(catalog_df[['time_abs']], on='time_abs', how='left', indicator=True)

    # Set target to 1 where there is a match in the catalog
    training_data_df['target'] = (merged_df['_merge'] == 'both').astype(int)

    # Drop unnecessary columns, including duplicates and the merge indicator
    # You can specify which columns you want to keep
    columns_to_keep = ['time_abs', 'time_rel(sec)', 'velocity(m/s)', 'target']
    training_data_df = training_data_df[columns_to_keep]

    # Drop duplicate entries if necessary
    training_data_df.drop_duplicates(inplace=True)

    return training_data_df


# Labeling using the catalog file
catalog_file = 'C:/Users/nm_ma/Desktop/space_apps_2024_seismic_detection/data/lunar/training/catalogs/apollo12_catalog_GradeA_final.csv'
labeled_training_data = label_matching_rows(training_data, catalog_file)

# Display the first few rows of the labeled DataFrame
print("Labeled Training Data:")
print(labeled_training_data.head())


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  training_data_df.drop_duplicates(inplace=True)


Labeled Training Data:
                    time_abs  time_rel(sec)  velocity(m/s)  target
0 1970-01-19 00:00:00.665000       0.000000  -6.153279e-14       0
1 1970-01-19 00:00:00.815943       0.150943  -7.701288e-14       0
2 1970-01-19 00:00:00.966887       0.301887  -8.396187e-14       0
3 1970-01-19 00:00:01.117830       0.452830  -8.096155e-14       0
4 1970-01-19 00:00:01.268774       0.603774  -7.097599e-14       0


In [5]:
labeled_training_data.tail()

Unnamed: 0,time_abs,time_rel(sec),velocity(m/s),target
43175424,1975-06-27 00:00:00.843887,86400.301887,-3.982647e-16,0
43175425,1975-06-27 00:00:00.994830,86400.45283,-5.580877e-16,0
43175426,1975-06-27 00:00:01.145774,86400.603774,-6.563002e-16,0
43175427,1975-06-27 00:00:01.296717,86400.754717,-3.281501e-16,0
43175428,1975-06-27 00:00:01.447660,86400.90566,0.0,0
