In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import torch
import torch.optim as optim

from preprocessing import Preprocessing
from cnn_model import SpectrogramArrivalCNN
from training import ModelTrainer
from dataloader import DataLoaderHandler
from testing import Testing

# Paths to your data
lunar_catalog_path = 'data/lunar/training/catalogs/apollo12_catalog_GradeA_final.csv'
lunar_data_directory = 'data/lunar/training/data/S12_GradeA/'
lunar_data_images_dir = 'model/model_output/lunar_preprocessed_images/'
save_dir_lunar = lunar_data_images_dir

martian_data_directory = 'data/mars/training/data/'
martian_data_images_dir = 'model/model_output/martian_preprocessed_images/'
save_dir_mars = martian_data_images_dir

# Load lunar catalog
lunar_catalog = pd.read_csv(lunar_catalog_path)

In [2]:
# Preprocess lunar data
preprocessor = Preprocessing(save_dir_lunar)
lunar_data, lunar_labels, lunar_arrival_times = preprocessor.preprocess_lunar_data(
    catalog=lunar_catalog, data_dir=lunar_data_directory, combine_images=True
)

Detected Event Arrival Time (relative): 150 s, (absolute): 1970-01-19T00:02:30.665000
Detected Event Arrival Time (relative): 132 s, (absolute): 1970-03-25T00:02:12.440000
Detected Event Arrival Time (relative): 143 s, (absolute): 1970-03-26T00:02:23.565000
Detected Event Arrival Time (relative): 144 s, (absolute): 1970-04-25T00:02:24.196000
Detected Event Arrival Time (relative): 131 s, (absolute): 1970-04-26T00:02:11.660000
Detected Event Arrival Time (relative): 131 s, (absolute): 1970-06-15T00:02:11.510000
Detected Event Arrival Time (relative): 131 s, (absolute): 1970-06-26T00:02:11.116000
Detected Event Arrival Time (relative): 131 s, (absolute): 1970-07-20T00:02:11.487000
Detected Event Arrival Time (relative): 131 s, (absolute): 1970-07-20T00:02:11.487000
Detected Event Arrival Time (relative): 169 s, (absolute): 1970-09-26T00:02:49.149000
Detected Event Arrival Time (relative): 131 s, (absolute): 1970-10-24T00:02:11.504000
Detected Event Arrival Time (relative): 153 s, (absolu

In [3]:
# Convert arrival times to relative time in seconds
reference_time = pd.Timestamp('1970-01-01')
lunar_arrival_times_in_seconds = preprocessor.convert_abs_to_rel_time(lunar_arrival_times, reference_time)

In [4]:
# Train-test split on lunar data
lunar_data_train, lunar_data_test, lunar_times_train, lunar_times_test = train_test_split(
    lunar_data, lunar_arrival_times_in_seconds, test_size=0.2, random_state=42
)

In [5]:
# Normalize the arrival times
scaler = MinMaxScaler()
lunar_times_train_normalized = scaler.fit_transform(np.array(lunar_times_train).reshape(-1, 1)).flatten()
lunar_times_test_normalized = scaler.transform(np.array(lunar_times_test).reshape(-1, 1)).flatten()

In [6]:
# Prepare DataLoader for training and testing
dataloader_handler = DataLoaderHandler(batch_size=32)
lunar_train_loader = dataloader_handler.prepare_data_for_training(lunar_data_train, lunar_times_train_normalized)
lunar_test_loader = dataloader_handler.prepare_data_for_training(lunar_data_test, lunar_times_test_normalized)

In [7]:
# Initialize the model and move it to the appropriate device
cnn_model = SpectrogramArrivalCNN()

In [8]:
# Set up loss function and optimizer
criterion_time = torch.nn.MSELoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=0.00001)

In [9]:
# Train the model on lunar data
trainer = ModelTrainer(cnn_model, criterion_time, optimizer)
trainer.train(lunar_train_loader, num_epochs=20)

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/20], Loss: 0.5874735116958618
Epoch [2/20], Loss: 0.5730822086334229
Epoch [3/20], Loss: 0.5917485654354095
Epoch [4/20], Loss: 0.5393049120903015
Epoch [5/20], Loss: 0.5263840556144714
Epoch [6/20], Loss: 0.5001329183578491
Epoch [7/20], Loss: 0.48011399805545807
Epoch [8/20], Loss: 0.48999081552028656
Epoch [9/20], Loss: 0.3775226026773453
Epoch [10/20], Loss: 0.41195233166217804
Epoch [11/20], Loss: 0.4342464357614517
Epoch [12/20], Loss: 0.3738560527563095
Epoch [13/20], Loss: 0.4048239290714264
Epoch [14/20], Loss: 0.45321959257125854
Epoch [15/20], Loss: 0.363756999373436
Epoch [16/20], Loss: 0.3911724239587784
Epoch [17/20], Loss: 0.3418674021959305
Epoch [18/20], Loss: 0.37683163583278656
Epoch [19/20], Loss: 0.3547985255718231
Epoch [20/20], Loss: 0.30029937624931335


In [10]:
# Evaluate the model on the test set
trainer.evaluate(lunar_test_loader)

Test Loss: 0.29598575830459595


  return F.mse_loss(input, target, reduction=self.reduction)


In [11]:
# Save the trained model
trainer.save_cnn_model('lunar_seismic_cnn_model_full.pth')
trainer.save_cnn_model_state_dict('lunar_seismic_cnn_model_state_dict.pth')

In [12]:
# Load the full pretrained lunar model
cnn_model = torch.load('lunar_seismic_cnn_model_full.pth')
cnn_model.train()  # Set to training mode

  cnn_model = torch.load('lunar_seismic_cnn_model_full.pth')


SpectrogramArrivalCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc_time): Linear(in_features=128, out_features=1, bias=True)
  (fc1): Linear(in_features=200704, out_features=128, bias=True)
)

In [13]:
# Preprocess and self-train on Martian data
preprocessor = Preprocessing(save_dir_mars)
martian_images, _ = preprocessor.preprocess_martian_data(data_dir=martian_data_directory)
martian_data_loader = dataloader_handler.prepare_unlabeled_data_loader(martian_images)

Detected Event Arrival Time (relative): 399 s, (absolute): 2022-01-02T04:06:39.025000
Detected Event Arrival Time (relative): 399 s, (absolute): 2022-02-03T08:06:39.009000


In [14]:
# Self-training on Martian data
trainer.self_train_on_martian_data(martian_data_loader, criterion_time=criterion_time, num_epochs=10)

Epoch [1/10], Loss Time: 0.0
Epoch [2/10], Loss Time: 0.0
Epoch [3/10], Loss Time: 0.0
Epoch [4/10], Loss Time: 0.0
Epoch [5/10], Loss Time: 0.0
Epoch [6/10], Loss Time: 0.0
Epoch [7/10], Loss Time: 0.0
Epoch [8/10], Loss Time: 0.0
Epoch [9/10], Loss Time: 0.0
Epoch [10/10], Loss Time: 0.0


In [15]:
# Save the fine-tuned model after self-training on Martian data
trainer.save_cnn_model('martian_seismic_cnn_model_full.pth')
trainer.save_cnn_model_state_dict('martian_seismic_cnn_model_state_dict.pth')

In [16]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Evaluate the model on the test set and compute metrics
def evaluate_and_get_metrics(trainer, test_loader):
    """
    Evaluate the model and compute metrics like MSE, MAE, and R-squared on the test set.
    """
    trainer.cnn_model.eval()  # Set to evaluation mode
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for batch in test_loader:
            # Unpack batch depending on the structure returned by the dataloader
            if isinstance(batch, (list, tuple)):
                inputs, time_labels = batch[0], batch[1]
            else:
                inputs = batch
                time_labels = None  # Adjust as per the actual data structure

            # Forward pass
            time_output = trainer.cnn_model(inputs)

            # Collect predictions and actual labels
            all_preds.extend(time_output.cpu().numpy())
            all_labels.extend(time_labels.cpu().numpy())

    # Convert to numpy arrays
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Compute regression metrics
    mse = mean_squared_error(all_labels, all_preds)
    mae = mean_absolute_error(all_labels, all_preds)
    r2 = r2_score(all_labels, all_preds)

    # Print metrics
    print(f"Mean Squared Error (MSE): {mse}")
    print(f"Mean Absolute Error (MAE): {mae}")
    print(f"R-squared (R²): {r2}")

    return mse, mae, r2

In [17]:

# Evaluate the model and compute metrics on the test set
mse, mae, r2 = evaluate_and_get_metrics(trainer, lunar_test_loader)

Mean Squared Error (MSE): 0.26671186089515686
Mean Absolute Error (MAE): 0.43810832500457764
R-squared (R²): -2.3124611377716064
