In [8]:
import numpy as np
import pandas as pd
import os
from obspy import read
from scipy.fftpack import fft
from scipy.stats import skew, kurtosis
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [15]:
# Function to load a CSV file
def load_csv_file(file_path):
    # Read CSV, automatically parse date columns, if any
    df = pd.read_csv(file_path)
    
    # Keep only numeric columns for the model (ignore timestamps or strings)
    df_numeric = df.select_dtypes(include=[np.number])
    
    # Return the numeric DataFrame
    return df_numeric

In [16]:
# Process all CSV files in a directory
def process_csv_directory(directory):
    csv_list = []
    for filename in os.listdir(directory):
        if filename.endswith('.csv'):
            file_path = os.path.join(directory, filename)
            df_numeric = load_csv_file(file_path)
            csv_list.append(df_numeric)
    return csv_list

In [17]:
# Convert continuous labels into binary classification (0 or 1)
def convert_labels_to_binary(y, threshold):
    return np.where(y > threshold, 1, 0)

In [18]:
# Train a machine learning model
def train_model(X_train, y_train):
    model = RandomForestClassifier(n_estimators=100)
    model.fit(X_train, y_train)
    return model

In [19]:
# Evaluate the model
def evaluate_model(model, X_val, y_val):
    y_pred = model.predict(X_val)
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")

In [20]:
# Main function to load, process, extract features, train, and evaluate the model
def main():
    # Updated CSV directory path for moon seismic data
    csv_directory = "space-apps/space_apps_2024_seismic_detection/data/lunar/training/data/S12_GradeA"
    
    # Load and process CSV data
    csv_data = process_csv_directory(csv_directory)
    
    # Convert CSV data to numpy arrays and combine
    csv_features = [df.to_numpy() for df in csv_data]
    X_csv = np.concatenate(csv_features, axis=0)  # Concatenate CSV features

    # Assuming the last column of numeric CSVs contains continuous labels
    X = X_csv[:, :-1]  # Features (all but last column)
    y_continuous = X_csv[:, -1]   # Continuous labels (last column)

    # Convert continuous labels to binary (or multi-class) labels
    threshold = 0.5  # Example threshold (you can adjust based on your data)
    y = convert_labels_to_binary(y_continuous, threshold)

    # Split data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train model
    model = train_model(X_train, y_train)

    # Evaluate model
    evaluate_model(model, X_val, y_val)

In [22]:
if __name__=='__main__':
    main()

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 1.0
Precision: 0.0
Recall: 0.0
F1 Score: 0.0


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
