In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from obspy import read
from scipy import signal
from sklearn.model_selection import train_test_split
from PIL import Image
from torchvision import transforms
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from datetime import timedelta
import json
from sklearn.metrics import accuracy_score, mean_absolute_error, ConfusionMatrixDisplay

# Set device to CPU
device = torch.device("cpu")

# Directory paths
lunar_catalog_path = '../../data/lunar_data/training/catalogs/apollo12_catalog_GradeA_final.csv'
lunar_data_directory = '../../data/lunar_data/training/data/S12_GradeA/'
martian_data_directory = '../../data/marsquake_data/training/data/'
lunar_data_images_dir = '../../model/model_output/lunar_preprocessed_images/'
martian_data_images_dir = '../../model/model_output/martian_preprocessed_images/'

In [2]:
# Utility Functions
def convert_rel_to_abs_time(start_time, time_rel):
    return (start_time + timedelta(seconds=float(time_rel))).strftime('%Y-%m-%dT%H:%M:%S.%f')

def apply_bandpass_filter(trace, sampling_rate, freqmin=0.5, freqmax=3.0):
    sos = signal.butter(4, [freqmin, freqmax], btype='bandpass', fs=sampling_rate, output='sos')
    return signal.sosfilt(sos, trace)

def load_existing_images(image_dir):
    image_files = [os.path.join(root, file) for root, _, files in os.walk(image_dir) for file in files if file.endswith('.png')]
    return image_files

In [3]:
# Preprocessing Functions
def preprocess_lunar_data(lunar_data_directory):
    lunar_images = load_existing_images(lunar_data_directory)
    print(f"{len(lunar_images)} lunar images loaded.")
    return lunar_images

def preprocess_martian_data(data_dir, save_dir, combine_images=True):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    martian_images, martian_arrival_times = [], []
    mseed_files = [f for f in os.listdir(data_dir) if f.endswith('.mseed')]

    if len(mseed_files) == 0:
        print("No .mseed files found in the directory.")
        return martian_images, martian_arrival_times

    for filename in mseed_files:
        file_path = os.path.join(data_dir, filename)
        csv_file_path = file_path.replace('.mseed', '.csv')

        if not os.path.exists(csv_file_path):
            print(f"CSV file not found for {filename}: {csv_file_path}")
            continue

        try:
            csv_data = pd.read_csv(csv_file_path)
            arrival_time_rel = csv_data['rel_time(sec)'].iloc[0]
            image_path = plot_and_save_trace_spectrogram(file_path, arrival_time_rel, save_dir, filename, combine_images)
            martian_images.append(image_path)
            martian_arrival_times.append(arrival_time_rel)
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
    
    return martian_images, martian_arrival_times

In [4]:
# CNN Model Definition
class SpectrogramCNN(nn.Module):
    def __init__(self):
        super(SpectrogramCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 112 * 112, 128) #edit to None if it doesn't work
        self.fc_event = nn.Linear(128, 3)
        self.fc_time = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        if self.fc1 is None:
            self.fc1 = nn.Linear(x.size(1), 128)
        x = torch.relu(self.fc1(x))
        return self.fc_event(x), self.fc_time(x)

In [5]:
# Data Preparation
def prepare_data_for_training(image_files, labels, time_labels, batch_size=32):
    if not image_files:
        return None  # Early exit if no images are provided
    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])
    ])
    image_tensors = [transform(Image.open(img)) for img in image_files if os.path.exists(img) and img.endswith('.png')]
    if image_tensors:
        X_tensor = torch.stack(image_tensors)
        y_event_tensor = torch.tensor(labels, dtype=torch.long)
        y_time_tensor = torch.tensor(time_labels, dtype=torch.float32)
        dataset = TensorDataset(X_tensor, y_event_tensor, y_time_tensor)
        return DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return None

# Training Functions
def train_model(model, train_loader, criterion_event, criterion_time, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, event_labels, time_labels in train_loader:
            optimizer.zero_grad()
            event_output, time_output = model(inputs)
            loss_event = criterion_event(event_output, event_labels)
            loss_time = criterion_time(time_output.squeeze(), time_labels)
            loss = loss_event + loss_time
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")

def self_train_on_martian_data(model, martian_data_loader, optimizer, criterion_event, criterion_time, num_epochs=10):
    model.train()
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch in martian_data_loader:
            images = batch[0]  # Unpack the images from the batch tuple
            optimizer.zero_grad()

            # Forward pass through the model
            event_output, time_output = model(images)

            # Generate pseudo-labels (predicted event labels)
            _, pseudo_labels = torch.max(event_output, 1)

            # For now, we do not have ground truth for Martian data, so we only optimize on pseudo-labels
            loss_event = criterion_event(event_output, pseudo_labels)
            loss_time = criterion_time(time_output.squeeze(), torch.zeros_like(time_output.squeeze()))  # Use zeros as placeholder

            # Calculate total loss and perform backpropagation
            total_loss = loss_event + loss_time
            total_loss.backward()
            optimizer.step()

            running_loss += total_loss.item()

        print(f"Self-training Epoch {epoch+1}, Loss: {running_loss/len(martian_data_loader)}")

# Model Evaluation
def evaluate_model(model, data_loader):
    model.eval()
    event_preds, time_preds, event_true, time_true = [], [], [], []
    with torch.no_grad():
        for images, event_labels, time_labels in data_loader:
            event_output, time_output = model(images)
            _, event_pred_classes = torch.max(event_output, 1)
            event_preds.extend(event_pred_classes.cpu().numpy())
            time_preds.extend(time_output.cpu().numpy())
            event_true.extend(event_labels.cpu().numpy())
            time_true.extend(time_labels.cpu().numpy())
    event_accuracy = accuracy_score(event_true, event_preds)
    time_mae = mean_absolute_error(time_true, time_preds)
    print(f"Validation Event Accuracy: {event_accuracy:.4f}")
    print(f"Validation Time MAE: {time_mae:.4f}")
    ConfusionMatrixDisplay.from_predictions(event_true, event_preds)
    plt.show()

In [6]:
# Model Save Function
def save_model_artifacts(model, model_name='seismic_cnn_model'):
    model_architecture = {
        'conv_layers': [{'in_channels': model.conv1.in_channels, 'out_channels': model.conv1.out_channels, 'kernel_size': model.conv1.kernel_size},
                        {'in_channels': model.conv2.in_channels, 'out_channels': model.conv2.out_channels, 'kernel_size': model.conv2.kernel_size}],
        'fc_layers': [{'in_features': model.fc_event.in_features, 'out_features': model.fc_event.out_features},
                      {'in_features': model.fc_time.in_features, 'out_features': model.fc_time.out_features}]
    }
    with open(f'{model_name}_architecture.json', 'w') as f:
        json.dump(model_architecture, f, indent=4)
    torch.save(model.state_dict(), f'{model_name}_weights.pth')
    torch.save(model, f'{model_name}_full.pth')
    print(f"Model saved to {model_name}_full.pth")

In [7]:
def flatten_image_list(image_list):
    """
    Ensure image list is flat in case there are nested lists of image paths.
    """
    if isinstance(image_list, (list, tuple)) and any(isinstance(i, (list, tuple)) for i in image_list):
        return [item for sublist in image_list for item in sublist]
    return image_list

In [8]:
def prepare_unlabeled_data_loader(image_files, batch_size=32):
    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])
    ])
    image_tensors = [transform(Image.open(img)) for img in image_files if os.path.exists(img) and img.endswith('.png')]
    if image_tensors:
        X_tensor = torch.stack(image_tensors)
        dataset = TensorDataset(X_tensor)
        return DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return None

In [9]:
def main():
    # Define the paths for preprocessed images
    lunar_data_images_dir = '../../model/model_output/lunar_preprocessed_images/'
    martian_data_images_dir = '../../model/model_output/martian_preprocessed_images/'
    save_dir = martian_data_images_dir  # Directory where Martian images are saved

    # 1. Load pre-generated lunar images
    print(f"Loading pre-generated lunar data from: {lunar_data_images_dir}")
    lunar_images = preprocess_lunar_data(lunar_data_images_dir)

    # Check if any lunar images were loaded
    if not lunar_images:
        print("Error: No lunar images found. Exiting.")
        return  # Exit early if no images found

    lunar_labels = [0] * len(lunar_images)  # Placeholder labels for lunar data
    lunar_times = [0] * len(lunar_images)   # Placeholder arrival times for lunar data

    # Prepare DataLoader for lunar data
    print("Preparing DataLoader for lunar data...")
    train_loader = prepare_data_for_training(lunar_images, lunar_labels, lunar_times)

    if train_loader is None:
        print("Error: No valid data for training. DataLoader creation failed.")
        return  # Exit early if DataLoader creation failed

    # 2. Initialize and train the model on lunar data
    print("Initializing SpectrogramCNN model...")
    model = SpectrogramCNN()
    criterion_event = nn.CrossEntropyLoss()
    criterion_time = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    print("Training model on lunar data...")
    train_model(model, train_loader, criterion_event, criterion_time, optimizer)

    # 3. Load pre-generated Martian images
    print(f"Loading pre-generated Martian data from: {martian_data_images_dir}")
    martian_images = load_existing_images(martian_data_images_dir)  # Load images directly

    # Check if any Martian images were loaded
    if not martian_images:
        print("Error: No Martian images found. Exiting.")
        return  # Exit early if no images found

    # Prepare DataLoader for Martian data
    print("Preparing DataLoader for Martian data...")
    martian_data_loader = prepare_unlabeled_data_loader(martian_images)

    if martian_data_loader is None:
        print("Error: No valid data for self-training. DataLoader creation failed.")
        return  # Exit early if DataLoader creation failed

    # 4. Self-train the model on Martian data
    print("Self-training model on Martian data...")
    self_train_on_martian_data(model, martian_data_loader, optimizer, criterion_event, criterion_time)

    # 5. Save the fine-tuned model
    print("Saving the fine-tuned model...")
    save_model_artifacts(model, model_name='../../model/fine_tuned_martian_seismic_cnn')


In [10]:
if __name__ == "__main__":
    main()

Loading pre-generated lunar data from: ../../model/model_output/lunar_preprocessed_images/
76 lunar images loaded.
Preparing DataLoader for lunar data...
Initializing SpectrogramCNN model...
Training model on lunar data...
Epoch 1, Loss: 1600.0661470890045
Epoch 2, Loss: 449.5803413391113
Epoch 3, Loss: 195.09067153930664
Epoch 4, Loss: 3.13076926022768
Epoch 5, Loss: 0.5083693067232767
Epoch 6, Loss: 0.670348584651947
Epoch 7, Loss: 0.8254997730255127
Epoch 8, Loss: 0.9365084966023763
Epoch 9, Loss: 0.9263431032498678
Epoch 10, Loss: 0.9064249992370605
Loading pre-generated Martian data from: ../../model/model_output/martian_preprocessed_images/
Preparing DataLoader for Martian data...
Self-training model on Martian data...
Self-training Epoch 1, Loss: 0.8976970314979553
Self-training Epoch 2, Loss: 0.889222264289856
Self-training Epoch 3, Loss: 0.8802964687347412
Self-training Epoch 4, Loss: 0.8709366917610168
Self-training Epoch 5, Loss: 0.8611681461334229
Self-training Epoch 6, Los

In [None]:
def load_trained_model(model_path):
    """
    Load the trained model from the specified path.

    Args:
        model_path (str): Path to the saved model file.

    Returns:
        model (nn.Module): Loaded PyTorch model.
    """
    model = torch.load(model_path, map_location=torch.device('cpu'))
    model.eval()  # Set model to evaluation mode
    return model


In [None]:
def predict_on_test_data(model, test_folders):
    """
    Iterate over test folders, make predictions, and collect results.

    Args:
        model (nn.Module): Trained model.
        test_folders (list): List of paths to test data directories.

    Returns:
        results (list): List of dictionaries containing predictions and file information.
    """
    results = []
    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])
    ])
    
    for folder in test_folders:
        print(f"Processing folder: {folder}")
        image_files = load_existing_images(folder)
        if not image_files:
            print(f"No images found in folder: {folder}")
            continue
        for img_path in image_files:
            img = Image.open(img_path)
            img_tensor = transform(img).unsqueeze(0)  # Add batch dimension
            with torch.no_grad():
                event_output, time_output = model(img_tensor)
                _, event_pred_class = torch.max(event_output, 1)
                rel_time_pred = time_output.item()
                
            # Extract filename
            filename = os.path.basename(img_path)
            # Collect results
            result = {
                'folder': folder,
                'filename': filename,
                'event_class_pred': event_pred_class.item(),
                'rel_time_pred': rel_time_pred
            }
            # Attempt to load ground truth labels if available
            label_csv_path = os.path.join(folder, 'labels.csv')
            if os.path.exists(label_csv_path):
                labels_df = pd.read_csv(label_csv_path)
                # Assume labels_df has columns 'filename', 'event_class_true', 'rel_time_true'
                label_row = labels_df[labels_df['filename'] == filename]
                if not label_row.empty:
                    result['event_class_true'] = int(label_row['event_class_true'].values[0])
                    result['rel_time_true'] = float(label_row['rel_time_true'].values[0])
            results.append(result)
    return results


In [None]:
def evaluate_predictions(results):
    """
    Evaluate model predictions against ground truth and generate plots.

    Args:
        results (list): List of dictionaries containing predictions and true labels.

    Returns:
        None
    """
    df = pd.DataFrame(results)
    
    # Event Classification Evaluation
    if 'event_class_true' in df.columns:
        event_accuracy = accuracy_score(df['event_class_true'], df['event_class_pred'])
        print(f"Event Classification Accuracy: {event_accuracy:.4f}")
        # Generate Confusion Matrix
        cm_display = ConfusionMatrixDisplay.from_predictions(df['event_class_true'], df['event_class_pred'])
        plt.title('Event Classification Confusion Matrix')
        plt.show()
    else:
        print("Ground truth event classes not available for evaluation.")
    
    # Arrival Time Evaluation
    if 'rel_time_true' in df.columns:
        time_mae = mean_absolute_error(df['rel_time_true'], df['rel_time_pred'])
        print(f"Arrival Time Mean Absolute Error (MAE): {time_mae:.4f} seconds")
        # Plot True vs. Predicted Arrival Times
        plt.figure(figsize=(8,6))
        plt.scatter(df['rel_time_true'], df['rel_time_pred'], alpha=0.5)
        plt.plot([df['rel_time_true'].min(), df['rel_time_true'].max()],
                 [df['rel_time_true'].min(), df['rel_time_true'].max()], 'r--')
        plt.xlabel('True Relative Arrival Time (s)')
        plt.ylabel('Predicted Relative Arrival Time (s)')
        plt.title('True vs. Predicted Relative Arrival Times')
        plt.show()
    else:
        print("Ground truth arrival times not available for evaluation.")


In [None]:
def save_results_to_csv(results, output_csv_path):
    """
    Save prediction results to a CSV file.

    Args:
        results (list): List of dictionaries containing predictions and file information.
        output_csv_path (str): Path to save the CSV file.

    Returns:
        None
    """
    df = pd.DataFrame(results)
    df.to_csv(output_csv_path, index=False)
    print(f"Results saved to {output_csv_path}")


In [1]:
def main():
    # ... [Training and saving the model code] ...

    # Load the fine-tuned model
    print("Loading the fine-tuned model...")
    model_path = 'fine_tuned_martian_seismic_cnn_full.pth'
    model = load_trained_model(model_path)
    
    # Define test folders
    test_folders = [
        '../../data/marsquake_data/test/data',
        '../../data/lunar_data/test/data/S12_GradeB',
        '../../data/lunar_data/test/data/S15_GradeA',
        '../../data/lunar_data/test/data/S15_GradeB',
        '../../data/lunar_data/test/data/S16_GradeA',
        '../../data/lunar_data/test/data/S16_GradeB'
    ]
    
    # Predict on test data
    print("Predicting on test data...")
    results = predict_on_test_data(model, test_folders)
    
    # Evaluate predictions
    print("Evaluating predictions...")
    evaluate_predictions(results)
    
    # Save results to CSV
    output_csv_path = 'prediction_results.csv'
    print(f"Saving results to {output_csv_path}")
    save_results_to_csv(results, output_csv_path)


In [None]:
if __name__ == "__main__":
    main()