In [1]:
# 0.31482218 'lr': 0.001
# 0.10198582 'lr': 0.0001 almost | all predictions are 0.5
# 0.26362439 'lr': 0.001 augmentations | all predictions are 0.5
# 0.24052888 'lr': 0.001 augmentations x 10 | not bad RMSE: 7063.950251422128 18.00782954549693 %
# 0.33602387 'lr': 0.001 batch_size 32 batchnorm augmentations x 10 | some shit RMSE: 24987.223981126954 64.59238449281463 %

In [2]:
import os

from typing import Tuple
from datetime import datetime

import obspy
import torch
import random
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np

from scipy import signal
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from plotly import graph_objects as go

In [3]:
def make_deterministic(seed: int = 0):
    """
    Make results deterministic.
    If seed == -1, do not make deterministic.
    Running the script in a deterministic way might slow it down.
    """
    if seed == -1:
        return
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def create_spectrogram(st: obspy.core.stream.Stream, minfreq: float = None, maxfreq: float = None, shape=(129, 2555)) -> np.ndarray:
    st_filt = st.copy()

    if minfreq is not None and maxfreq is not None:
        st_filt.filter('bandpass', freqmin=minfreq, freqmax=maxfreq)

    tr_filt = st_filt.traces[0].copy()
    tr_data_filt = tr_filt.data

    _, _, spectrogram = signal.spectrogram(tr_data_filt, tr_filt.stats.sampling_rate)

    # Normalize
    spectrogram = (spectrogram - spectrogram.min()) / (spectrogram.max() - spectrogram.min())

    current_shape = spectrogram.shape
    padded_spectrogram = np.zeros(shape, dtype=np.float64)
    min_rows = min(current_shape[0], shape[0])
    min_cols = min(current_shape[1], shape[1])
    padded_spectrogram[:min_rows, :min_cols] = spectrogram[:min_rows, :min_cols]

    return padded_spectrogram.astype(np.float64)

def create_label(st: obspy.core.stream.Stream, row: pd.Series) -> float:
    # Start time of trace (another way to get the relative arrival time using datetime)
    arrival = row['time_rel(sec)']
    starttime = st.traces[0].stats.starttime.datetime
    total = (st.traces[0].stats.endtime.datetime - starttime).total_seconds()
    
    return arrival / total

def create_sample(row, minfreq: float = None, maxfreq: float = None) -> Tuple[np.ndarray, float]:
    test_filename = row.filename
    st = obspy.read(f'{data_directory}{test_filename}.mseed')
    
    return create_spectrogram(st, minfreq, maxfreq), create_label(st, row)

In [4]:
data_directory = './data/lunar/training/data/S12_GradeA/'
categories_df = pd.read_csv('./data/lunar/training/catalogs/apollo12_catalog_GradeA_final.csv')

labels = []
samples = []
for index in range(len(categories_df)):
    row = categories_df.iloc[index]
    spectrogram, label = create_sample(row, 0.001, 1.0)
    os.makedirs(data_directory, exist_ok=True)
    spectrogram_path = os.path.join('./data/lunar/training/spectrograms/', row["filename"])
    np.savez(spectrogram_path, spectrogram)
    
    labels.append(label)
    samples.append(spectrogram)
    
df = pd.DataFrame({'filename': categories_df.filename, 'label': labels})
df = pd.concat([df, categories_df[["mq_type", "evid", "time_rel(sec)"]]], axis=1)

df.to_csv('./data/lunar/training/spectrograms.csv', index=False)

In [5]:
class ComposeWithLabels:
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, sample):
        for t in self.transforms:
            sample = t(sample)
        return sample

class RandomApplyWithLabels:
    def __init__(self, transform, p=0.5):
        self.transform = transform
        self.p = p

    def __call__(self, sample):
        if torch.rand(1).item() < self.p:
            sample = self.transform(sample)
        return sample

class RandomTimeShift:
    """
    Shifts the spectrogram in the time dimension by a random amount.
    Also adjusts the time label accordingly.
    """
    def __init__(self, shift_range):
        self.shift_range = shift_range

    def __call__(self, sample):
        spectrogram = sample['spectrogram']
        label = sample['label']

        shift = np.random.randint(-self.shift_range, self.shift_range)
        total_time_steps = spectrogram.shape[2]

        if shift == 0:
            pass  # No change needed
        elif shift > 0:
            # Shift to the right
            padding = torch.zeros(spectrogram.shape[0], spectrogram.shape[1], shift)
            spectrogram = torch.cat((padding, spectrogram[:, :, :-shift]), dim=2)
        else:
            # Shift to the left
            shift = -shift
            padding = torch.zeros(spectrogram.shape[0], spectrogram.shape[1], shift)
            spectrogram = torch.cat((spectrogram[:, :, shift:], padding), dim=2)

        # Adjust the label
        label += (shift / total_time_steps)
        label = torch.clamp(label, 0.0, 1.0)

        sample['spectrogram'] = spectrogram
        sample['label'] = label
        return sample

class RandomTimeMask:
    def __init__(self, max_mask_size):
        self.max_mask_size = max_mask_size

    def __call__(self, sample):
        spectrogram = sample['spectrogram']
        _, _, t = spectrogram.shape
        mask_size = np.random.randint(0, self.max_mask_size)
        t0 = np.random.randint(0, t - mask_size)
        spectrogram[:, :, t0:t0 + mask_size] = 0

        sample['spectrogram'] = spectrogram
        return sample

class RandomFrequencyMask:
    def __init__(self, max_mask_size):
        self.max_mask_size = max_mask_size

    def __call__(self, sample):
        spectrogram = sample['spectrogram']
        _, f, _ = spectrogram.shape
        mask_size = np.random.randint(0, self.max_mask_size)
        f0 = np.random.randint(0, f - mask_size)
        spectrogram[:, f0:f0 + mask_size, :] = 0

        sample['spectrogram'] = spectrogram
        return sample

class AddNoise:
    def __init__(self, noise_level=0.005):
        self.noise_level = noise_level

    def __call__(self, sample):
        spectrogram = sample['spectrogram']
        noise = torch.randn_like(spectrogram) * self.noise_level
        spectrogram = spectrogram + noise

        sample['spectrogram'] = spectrogram
        return sample

class AmplitudeScaling:
    def __init__(self, scale_range=(0.8, 1.2)):
        self.scale_range = scale_range

    def __call__(self, sample):
        spectrogram = sample['spectrogram']
        scale = np.random.uniform(*self.scale_range)
        spectrogram = spectrogram * scale

        sample['spectrogram'] = spectrogram
        return sample
    
from scipy.ndimage import gaussian_filter

class RandomSpikeAugmentation:
    def __init__(self, base_spike_value=1.0, spike_duration=1, max_num_spikes=3, fade_factor=0.8, noise_level=0.25, size=4, sigma=1):
        """
        Adds N random spikes to the spectrogram with discrete steps, noise, and frequency fade,
        and applies a Gaussian filter to smooth the spikes.
        
        :param base_spike_value: The base value of the spike.
        :param spike_duration: Duration of each spike in time steps.
        :param max_num_spikes: Number of spikes to add.
        :param fade_factor: Factor by which the spike fades at higher frequencies.
        :param noise_level: The amount of random noise to add to the spike.
        :param size: Defines which portion of frequencies will be affected.
        :param sigma: Standard deviation for Gaussian filter.
        """
        self.base_spike_value = base_spike_value
        self.spike_duration = spike_duration
        self.max_num_spikes = max_num_spikes
        self.fade_factor = fade_factor
        self.noise_level = noise_level
        self.size = size
        self.sigma = sigma

    def __call__(self, sample):
        spectrogram = sample['spectrogram']
        _, f, t = spectrogram.shape
        
        num_spikes = np.random.randint(1, self.max_num_spikes) if self.max_num_spikes > 1 else 1
        
        for _ in range(num_spikes):
            # Randomly select the start time for the spike
            spike_start = np.random.randint(0, t - self.spike_duration)
            
            # Create a spike that fades at higher frequencies and has some discrete steps
            for i in range(int(f * (1 / self.size))):  # Iterate over first 1/size of frequencies
                # Compute the fade factor for the current frequency
                fade = self.fade_factor ** i
                
                # Create a spike with noise and discrete steps
                spike_value = self.base_spike_value * fade + (np.random.randn() * self.noise_level)
                spike = torch.ones(self.spike_duration) * spike_value
                spike = torch.clamp(spike, 0.0, 1.0)
                
                # Apply the spike to the spectrogram at the current frequency
                spectrogram[:, i, spike_start:spike_start + self.spike_duration] += spike

        # Convert the spectrogram to numpy for applying the gaussian filter
        spectrogram_np = spectrogram.numpy()

        # Apply Gaussian filter to smooth the spikes
        spectrogram_np = gaussian_filter(spectrogram_np, sigma=self.sigma)

        # Convert back to torch tensor
        sample['spectrogram'] = torch.tensor(spectrogram_np)

        return sample

In [6]:
class SpectrogramDataset(Dataset):
    def __init__(self, dataframe: pd.DataFrame, sample: float = 1.0, transform=None, augmentations=False):
        self.samples_df = dataframe.sample(frac=sample, replace=False if sample == 1.0 else True)
        self.samples_df = self.samples_df.sort_values(
            by='evid',
            key=lambda x: x.str.extract('(\d+)$').iloc[:, 0].astype(int)
        )
        self.augmentations = augmentations
        
        if transform is None:
            self.transform = transforms.Compose([transforms.ToTensor()])
        else:
            self.transform = transform

        if self.augmentations:
            self.augmentation_transforms = ComposeWithLabels([
                RandomApplyWithLabels(RandomTimeShift(shift_range=20), p=1.0),
                RandomApplyWithLabels(RandomTimeMask(max_mask_size=50), p=1.0),
                RandomApplyWithLabels(RandomFrequencyMask(max_mask_size=2), p=0.5),
                RandomApplyWithLabels(AddNoise(noise_level=0.0075), p=1.0),
                RandomApplyWithLabels(AmplitudeScaling(scale_range=(0.8, 1.2)), p=1.0),
                RandomApplyWithLabels(RandomSpikeAugmentation(size=4, max_num_spikes=2), p=1.0),
                RandomApplyWithLabels(RandomSpikeAugmentation(size=2, max_num_spikes=1), p=1.0),
            ])
        else:
            self.augmentation_transforms = None

    def __len__(self):
        return len(self.samples_df)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        row = self.samples_df.iloc[idx]
        spectrogram = np.load(f'./data/lunar/training/spectrograms/{row.filename}.npz')['arr_0']
        label = torch.tensor(row.label, dtype=torch.float64)
        
        if self.transform:
            spectrogram = self.transform(spectrogram)
            
        sample = {'spectrogram': spectrogram, 'label': label}

        if self.augmentation_transforms:
            sample = self.augmentation_transforms(sample)
        
        return sample['spectrogram'].double(), sample['label'].double()

In [7]:
class SeismicCNN(nn.Module):
    def __init__(self):
        super(SeismicCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=(3, 3), padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=(3, 3), padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=(3, 3), padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 16 * 319, 128)
        self.fc2 = nn.Linear(128, 1)
        self.sigmoid = nn.Sigmoid()
        self.double()

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(-1, 64 * 16 * 319)
        x = torch.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x
    
    def save(self, path):
        torch.save(self.state_dict(), path)
        
    def load(self, path):
        self.load_state_dict(torch.load(path, weights_only=False))
        self.eval()

In [8]:
class Trainer:
    def __init__(
        self,
        model_fn,
        dataset_fn,
        dataframe,
        criterion,
        optimizer_fn,
        shuffle=True,
    ):
        self.model = None
        self.model_fn = model_fn
        self.dataset_fn = dataset_fn
        self.dataframe = dataframe
        self.criterion = criterion
        self.optimizer_fn = optimizer_fn

        self.shuffle = shuffle
        self.num_epochs = None
        self.batch_size = None
        self.k_folds = None
        self.optimizer_params = None

        self.best_val_loss = float('inf')
        self.best_model_state = None
        self.best_hyperparams = None
        
        make_deterministic(42)

    def train_cross_validation(self, optimizer_params, num_epochs=1, batch_size=16, k_folds=5):
        self.optimizer_params = optimizer_params
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.k_folds = k_folds
        
        # K-fold cross-validation
        indices = list(range(len(self.dataframe)))
        kf = KFold(n_splits=self.k_folds, shuffle=self.shuffle)
        mean_val_loss = 0.0

        for fold, (train_idx, val_idx) in enumerate(kf.split(indices)):
            print(f'Fold {fold+1}/{self.k_folds}')
            # Create data loaders
            train_subset = self.dataset_fn(dataframe=self.dataframe.iloc[train_idx], sample=10.0, augmentations=True)
            val_subset = self.dataset_fn(dataframe=self.dataframe.iloc[val_idx], sample=1, augmentations=False)

            train_loader = DataLoader(
                train_subset, batch_size=self.batch_size, shuffle=True
            )
            val_loader = DataLoader(
                val_subset, batch_size=self.batch_size, shuffle=False
            )

            # Initialize model and optimizer for this fold
            self.model = self.model_fn()
            self.model.train()
            optimizer = self.optimizer_fn(
                self.model.parameters(), **self.optimizer_params
            )

            self._train_model(train_loader, optimizer)
            val_loss = self._evaluate_model(val_loader)
            mean_val_loss += val_loss
            print(f'Validation Loss for fold {fold+1}: {val_loss:.8f}')

        mean_val_loss /= self.k_folds
        print(f'Mean Validation Loss: {mean_val_loss:.8f}')
        
        # Check for best model
        if mean_val_loss < self.best_val_loss:
            self.best_val_loss = mean_val_loss
            self.best_model_state = self.model.state_dict()
            self.best_hyperparams = {
                'learning_rate': self.optimizer_params.get('lr', None),
                'batch_size': self.batch_size,
                'num_epochs': self.num_epochs,
            }

        print('Cross validation training complete')
        print(self.best_hyperparams)
        
    def train(self, epochs, batch_size, optimizer_params, augmentations=True):
        self.num_epochs = epochs
        self.batch_size = batch_size
        self.optimizer_params = optimizer_params
        dataset = self.dataset_fn(dataframe=self.dataframe, sample=10.0, augmentations=augmentations)
        train_loader = DataLoader(
            dataset, batch_size=self.batch_size, shuffle=True
        )
        self.model = self.model_fn()
        self.model.train()
        optimizer = self.optimizer_fn(
            self.model.parameters(), **self.optimizer_params
        )
        self._train_model(train_loader, optimizer)
        train_loss = self._evaluate_model(
            DataLoader(
            self.dataset_fn(dataframe=self.dataframe, sample=1, augmentations=augmentations), batch_size=self.batch_size, shuffle=False
		    )
        )
        print(f'Training Loss: {train_loss:.8f}')

    def _train_model(self, train_loader, optimizer):
        self.model.train()
        for epoch in range(self.num_epochs):
            running_loss = 0.0
            with tqdm(
                train_loader,
                desc=f'Epoch [{epoch+1}/{self.num_epochs}]',
                unit='batch',
            ) as batch_bar:
                for spectrograms, labels in batch_bar:
                    labels = labels.unsqueeze(1)
                    optimizer.zero_grad()
                    outputs = self.model(spectrograms)
                    loss = self.criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item()
                    batch_bar.set_postfix(loss=loss.item())
            avg_loss = running_loss / len(train_loader)
            print(f"Epoch [{epoch+1}/{self.num_epochs}], Loss: {avg_loss:.8f}")

    def _evaluate_model(self, val_loader):
        self.model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for spectrograms, labels in val_loader:
                labels = labels.unsqueeze(1)
                outputs = self.model(spectrograms)
                loss = self.criterion(outputs, labels)
                val_loss += loss.item()
        avg_val_loss = val_loss / len(val_loader)
        return avg_val_loss

    def save_model(self, path):
        torch.save(self.model.state_dict(), path)
        
    def load_model(self, path):
        self.model.load_state_dict(torch.load(path))

In [9]:
def model_fn():
    return SeismicCNN()

def dataset_fn(dataframe, sample, augmentations):
    return SpectrogramDataset(dataframe=dataframe, sample=sample, augmentations=augmentations)

def optimizer_fn(params, lr):
    return optim.Adam(params, lr=lr)

In [10]:
def inference(model, dataset, save_folder, save_images=True):
    os.makedirs(save_folder, exist_ok=True)
    os.makedirs(f'{save_folder}/plots', exist_ok=True)

    fnames = []
    detection_times = []
    relative_times = []
    
    model.eval()

    for index in range(len(dataset)):
        spectrogram, label = dataset[index]
        test_filename = dataset.samples_df.iloc[index].filename
        tr = obspy.read(f'{data_directory}{test_filename}.mseed')[0]
        tr_data = tr.data
        tr_times = tr.times()
        starttime = tr.stats.starttime.datetime
        endtime = tr.stats.endtime.datetime
        total_seconds = (endtime - starttime).total_seconds()

        prediction = model(spectrogram).item()
        relative_time = prediction * total_seconds
        
        sampled_time = int(relative_time * tr.stats.sampling_rate)
        true = int(dataset.samples_df.iloc[index]['time_rel(sec)'] * tr.stats.sampling_rate)

        on_time = starttime + pd.Timedelta(seconds=relative_time)
        on_time_str = datetime.strftime(on_time, '%Y-%m-%dT%H:%M:%S.%f')
        fnames.append(test_filename)
        detection_times.append(on_time_str)
        relative_times.append(relative_time)

        if save_images:
            fig = go.Figure()
            fig.add_trace(go.Scatter(
                x=tr_times, y=tr_data, mode='lines', name='Seismogram'
            ))
            fig.add_vline(x=tr_times[sampled_time], line=dict(color='red'), annotation_text="Trig. On", annotation_position="top left")
            fig.add_vline(x=tr_times[true], line=dict(color='blue'), annotation_text="True", annotation_position="top left")

            # Customize the layout
            fig.update_layout(
                title="Seismogram with STA/LTA Triggers",
                xaxis_title="Time (s)",
                yaxis_title="Amplitude",
                xaxis_range=[min(tr_times), max(tr_times)],
                height=400,
                width=900
            )
            fig.write_image(os.path.join(f'{save_folder}/plots/{test_filename}.png'))

    detect_df = pd.DataFrame(data = {
        'filename':fnames,
        'time_abs(%Y-%m-%dT%H:%M:%S.%f)':detection_times,
        'time_rel(sec)': relative_times,
        "evid": dataset.samples_df['evid']
    })
    
    detect_df = detect_df.sort_values(
        by='evid',
        key=lambda x: x.str.extract('(\d+)$').iloc[:, 0].astype(int)
    )
    detect_df.to_csv(f'{save_folder}/catalog.csv', index=False)

In [11]:
model = SeismicCNN()
model.load('./data/lunar/models/seismic_activity_cnn_best.pth')
dataframe = pd.read_csv('./data/lunar/training/spectrograms.csv')

test_dataset = SpectrogramDataset(dataframe, augmentations=False)
inference(model, test_dataset, save_folder='./data/lunar/cnn', save_images=False)

In [12]:
df = pd.read_csv(f'./data/lunar/cnn/catalog.csv')
mse = mean_squared_error(df['time_rel(sec)'], categories_df['time_rel(sec)'])
rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')
print(f"{rmse / categories_df['time_rel(sec)'].mean() * 100} %")

RMSE: 6575.2926625297505
16.99723955456973 %


In [27]:
# Training

# criterion = nn.MSELoss()
# 
# k_folds = 5 
# batch_size = len(dataframe) // k_folds
# trainer = Trainer(
#     model_fn=model_fn,
#     dataset_fn=dataset_fn,
#     dataframe=dataframe,
#     criterion=criterion,
#     optimizer_fn=optimizer_fn,
#     shuffle=True,
# )
# trainer.train_cross_validation(  
#     optimizer_params={'lr': 0.001},
#     num_epochs=5,
#     batch_size=batch_size,
#     k_folds=k_folds
# )
# trainer.train(epochs=10, batch_size=batch_size, optimizer_params={'lr': 0.001})
# trainer.save_model('./data/lunar/models/seismic_activity_cnn_best.pth')

In [None]:
# # Pretty plot of errors
# 
# sorted_df = pd.DataFrame({'time_rel_true': categories_df['time_rel(sec)'], 'time_rel_pred': df['time_rel(sec)']})
# sorted_df = sorted_df.sort_values(by='time_rel_true')
# 
# # Creating the figure
# fig = go.Figure()
# 
# # Add a filled line plot
# fig.add_trace(go.Scatter(
#     x=sorted_df['time_rel_true'],
#     y=sorted_df['time_rel_pred'],
#     mode='lines',
#     fill='tozeroy',  # Fills to the x-axis
#     line=dict(color='rgba(138, 43, 226, 0.6)', width=2),  # Purple line with some transparency
# ))
# 
# # Update the layout to match the provided aesthetic
# fig.update_layout(
#     title="True vs Predicted Relative Times",
#     xaxis_title="True Relative Time (s)",
#     yaxis_title="Predicted Relative Time (s)",
#     height=1200,
#     width=3000,
#     paper_bgcolor='rgba(0,0,0,0)',  # Set the background to be transparent (dark theme)
#     plot_bgcolor='rgba(10,10,30,0.8)',  # Dark blue background color for the plot area
#     xaxis=dict(
#         showgrid=False,  # Hide gridlines
#         color='white'  # X-axis label color
#     ),
#     yaxis=dict(
#         showgrid=True,
#         gridcolor='rgba(255,255,255,0.1)',  # Light gridlines to match the aesthetic
#         color='white'  # Y-axis label color
#     ),
#     font=dict(
#         color="white"  # General font color for the title, axes, etc.
#     )
# )
# # font size
# fig.update_layout(
#     font=dict(
#         size=32
#     )
# )
# 
# fig.show()
# fig.write_image('./data/cnn/true_vs_predicted.png')

In [None]:
# dataset = SpectrogramDataset(dataframe, augmentations=True)
# 
# def show_sample(dataset, index):
#     dataset_sample = dataset[index][0][0]
# 
#     fig = go.Figure()
#     fig.add_trace(go.Heatmap(
#         z=dataset_sample,
#         colorscale='Viridis'
#     ))
#     fig.update_layout(
#         title="Spectrogram Sample",
#         xaxis_title="Time (s)",
#         yaxis_title="Frequency (Hz)",
#         height=400,
#         width=900
#     )
#     fig.show()
# 
# show_sample(dataset, 15)

In [None]:
# # There are samples where more than 1 quake has happened during the observation, we will convert them to separate samples
# from obspy import UTCDateTime
# import matplotlib.pyplot as plt
# 
# #multiple_labels_indexes = [(31, 32), (67, 68), (107, 108), (150, 151)]
# multiple_labels_indexes = [(10, 11)]
# data_directory = './data/lunar/training/data/S12_GradeA/'
# categories_df = pd.read_csv('./data/lunar/training/catalogs/apollo12_catalog_GradeA_final.csv')
# 
# for pair in multiple_labels_indexes:
#     first_row_index = categories_df[categories_df.evid == f"evid{pair[0]:05d}"].index[0]
#     second_row_index = categories_df[categories_df.evid == f"evid{pair[1]:05d}"].index[0]
# 
#     first_row = categories_df.loc[first_row_index]
#     second_row = categories_df.loc[second_row_index]
# 
#     buffer = 1000
#     cut_time = UTCDateTime(second_row["time_abs(%Y-%m-%dT%H:%M:%S.%f)"]) - buffer
# 
#     first_st = obspy.read(f'{data_directory}{first_row.filename}.mseed')
#     second_st = obspy.read(f'{data_directory}{second_row.filename}.mseed')
# 
#     first_st = first_st.trim(endtime=cut_time)
#     second_st = second_st.trim(starttime=cut_time)
# 
#     first_st.write(f'{data_directory}{first_row.filename}.mseed', format="MSEED")
#     second_st.write(f'{data_directory}{second_row.filename}.mseed', format="MSEED")
# 
#     categories_df.loc[first_row_index, "time_abs(%Y-%m-%dT%H:%M:%S.%f)"] = first_st[0].stats.starttime.datetime.strftime('%Y-%m-%dT%H:%M:%S.%f')
#     categories_df.loc[second_row_index, "time_abs(%Y-%m-%dT%H:%M:%S.%f)"] = second_st[0].stats.starttime.datetime.strftime('%Y-%m-%dT%H:%M:%S.%f')
#     categories_df.loc[second_row_index, "time_rel(sec)"] = buffer
# 
#     for data_cat, index in zip([first_st, second_st], [first_row_index, second_row_index]):
#         arrival_time_rel = categories_df.iloc[index]['time_rel(sec)']
# 
#         print(arrival_time_rel)
# 
#         times = data_cat[0].times()
#         data = data_cat[0].data
#         # Plot the trace!
#         fig,ax = plt.subplots(1,1,figsize=(10,3))
#         ax.plot(times,data)
#         # Make the plot pretty
#         ax.set_xlim([min(times),max(times)])
#         ax.set_ylabel('Velocity (m/s)')
#         ax.set_xlabel('Time (s)')
#         # Plot where the arrival time is
#         arrival_line = ax.axvline(x=arrival_time_rel, c='red', label='Rel. Arrival')
#         ax.legend(handles=[arrival_line])
# 
#         plt.show()
# 
# categories_df.to_csv('./data/lunar/training/catalogs/apollo12_catalog_GradeA_final.csv', index=False)