In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
import torch

# Assume you uploaded files and they are named as follows
# x_tensor_path = '/X_tensor.pt'
x_tensor_filtered_path = '/content/X_tensor_aftershocks_filtered.pt'
# y_tensor_path = '/y_tensor.pt'
y_tensor_filtered_path = '/content/y_tensor_aftershocks_filtered.pt'


# x_tensor_filtered_path = '/content/X_tensor_pacific_belt_label1.pt'

# y_tensor_filtered_path = '/content/y_tensor_pacific_belt_label1.pt'

# Loading the tensors
# X_tensor = torch.load(x_tensor_path)
X_tensor_filtered = torch.load(x_tensor_filtered_path)
# y_tensor = torch.load(y_tensor_path)
y_tensor_filtered = torch.load(y_tensor_filtered_path)


# Display the shape of the tensors
# print("Shape of X_tensor:", X_tensor.shape)
print("Shape of X_tensor_filtered:", X_tensor_filtered.shape)
# print("Shape of y_tensor:", y_tensor.shape)
print("Shape of y_tensor_filtered:", y_tensor_filtered.shape)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

mean = X_tensor_filtered.mean(dim=(0, 1))  # Mean for each feature across all samples and time steps
std = X_tensor_filtered.std(dim=(0, 1))    # Std dev for each feature across all samples and time steps

# Normalize
X_tensor_normalized = (X_tensor_filtered - mean[None, None, :]) / std[None, None, :]

#X_tensor_normalized = X_tensor_normalized.to(device)
#y_tensor_filtered = y_tensor_filtered.to(device)

  X_tensor_filtered = torch.load(x_tensor_filtered_path)
  y_tensor_filtered = torch.load(y_tensor_filtered_path)


Shape of X_tensor_filtered: torch.Size([16234, 29, 6])
Shape of y_tensor_filtered: torch.Size([16234])


In [None]:
from torch.utils.data import TensorDataset, DataLoader, random_split

# Assuming X_tensor_filtered and Y_tensor_filtered are already loaded
X = X_tensor_normalized
Y = y_tensor_filtered

# Create dataset from tensors
dataset = TensorDataset(X, Y)

# Splitting the dataset into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoaders
batch_size = 64  # You can adjust the batch size
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [None]:
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # Take the last time step
        return out

# Initialize the LSTM model
input_dim = 6  # number of features
hidden_dim = 64
num_layers = 2
output_dim = 1  # Output dimension (regression)

model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)


In [None]:
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, val_loader, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs.float())
            loss = criterion(outputs.squeeze(), labels.float())
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            val_loss = 0
            for inputs, labels in val_loader:
                outputs = model(inputs.float())
                val_loss += criterion(outputs.squeeze(), labels.float()).item()

        print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}, Val Loss: {val_loss/len(val_loader):.4f}')
model = model.to(device)
# Train the model
num_epochs = 10  # Number of epochs
train_model(model, train_loader, val_loader, num_epochs)


Epoch 1, Loss: 0.0001, Val Loss: 0.0019
Epoch 2, Loss: 0.0170, Val Loss: 0.0018
Epoch 3, Loss: 0.0000, Val Loss: 0.0018
Epoch 4, Loss: 0.0000, Val Loss: 0.0018
Epoch 5, Loss: 0.0000, Val Loss: 0.0018
Epoch 6, Loss: 0.0000, Val Loss: 0.0018
Epoch 7, Loss: 0.0000, Val Loss: 0.0019
Epoch 8, Loss: 0.0000, Val Loss: 0.0018
Epoch 9, Loss: 0.0000, Val Loss: 0.0019
Epoch 10, Loss: 0.0168, Val Loss: 0.0018


In [None]:
import torch
import torch.nn as nn

class BinaryLSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(BinaryLSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)  # Output dimension is 1 for binary classification
        self.sigmoid = nn.Sigmoid()  # Sigmoid activation to output probabilities

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        out = self.sigmoid(out)
        return out

# Initialize the LSTM model for binary classification
input_dim = 6  # Number of input features
hidden_dim = 64  # Hidden layers dimension
num_layers = 2  # Number of LSTM layers

model = BinaryLSTMModel(input_dim, hidden_dim, num_layers)

In [None]:
import torch.optim as optim

# Loss and optimizer
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss for binary classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

def train_model(model, train_loader, val_loader, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs.float())
            loss = criterion(outputs.squeeze(), labels.float())
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            val_loss = 0
            for inputs, labels in val_loader:
                outputs = model(inputs.float())
                val_loss += criterion(outputs.squeeze(), labels.float()).item()

        print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}, Val Loss: {val_loss/len(val_loader):.4f}')

# Training the model
model = model.to(device)
num_epochs = 10  # You can adjust the number of epochs
train_model(model, train_loader, val_loader, num_epochs)


Epoch 1, Loss: 0.0020, Val Loss: 0.0134
Epoch 2, Loss: 0.0010, Val Loss: 0.0137
Epoch 3, Loss: 0.0021, Val Loss: 0.0134
Epoch 4, Loss: 0.0016, Val Loss: 0.0134
Epoch 5, Loss: 0.1092, Val Loss: 0.0134
Epoch 6, Loss: 0.0014, Val Loss: 0.0135
Epoch 7, Loss: 0.0014, Val Loss: 0.0135
Epoch 8, Loss: 0.0013, Val Loss: 0.0135
Epoch 9, Loss: 0.0017, Val Loss: 0.0134
Epoch 10, Loss: 0.0014, Val Loss: 0.0135


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import roc_auc_score
import numpy as np

# Define your LSTM model
class BinaryLSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(BinaryLSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)  # Output dimension is 1 for binary classification
        self.sigmoid = nn.Sigmoid()  # Sigmoid activation to output probabilities

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        out = self.sigmoid(out)
        return out

# Initialize the model
input_dim = 6  # Number of input features per earthquake
hidden_dim = 64  # Hidden layers dimension
num_layers = 2  # Number of LSTM layers

model = BinaryLSTMModel(input_dim, hidden_dim, num_layers)
model = model.to('cuda')  # Move model to GPU

# Define loss and optimizer
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss for binary classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

# Prepare data
X = X_tensor_filtered  # Shape: (a, b, c) -> a sequences, b earthquakes, c features per earthquake
y = y_tensor_filtered  # Shape: (a,) -> Labels (0 or 1) for each sequence

# Perform oversampling on the sequences
ros = RandomOverSampler(random_state=42)
X_reshaped = X.reshape(X.shape[0], -1)  # Flatten the sequences temporarily to use with RandomOverSampler
X_resampled, y_resampled = ros.fit_resample(X_reshaped, y)  # Oversample sequences, not individual earthquakes
X_resampled = X_resampled.reshape(-1, X.shape[1], X.shape[2])  # Reshape back to (a, b, c)

# Check dimensions to ensure correct reshaping
print(f"Original X shape: {X.shape}")
print(f"Resampled X shape: {X_resampled.shape}")
print(f"Resampled y shape: {y_resampled.shape}")

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32))
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# Train the model
def train_model(model, train_loader, val_loader, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to('cuda'), labels.to('cuda')
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        model.eval()
        val_loss = 0
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to('cuda'), labels.to('cuda')
                outputs = model(inputs)
                val_loss += criterion(outputs.squeeze(), labels).item()
                all_preds.append(outputs.cpu().numpy())
                all_labels.append(labels.cpu().numpy())

        val_loss /= len(val_loader)
        all_preds = np.concatenate(all_preds)
        all_labels = np.concatenate(all_labels)

        # Convert probabilities to binary predictions (0 or 1) using a threshold of 0.5
        all_preds_binary = (all_preds > 0.5).astype(int)

        # Calculate and print the confusion matrix
        cm = confusion_matrix(all_labels, all_preds_binary)
        print(f'Confusion Matrix for Epoch {epoch+1}:')
        print(cm)

        # Compute ROC AUC score
        auc = roc_auc_score(all_labels, all_preds)

        print(f'Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}, ROC AUC: {auc:.4f}')

# Train the model
num_epochs = 10  # You can adjust the number of epochs
train_model(model, train_loader, val_loader, num_epochs)


Original X shape: torch.Size([16234, 29, 6])
Resampled X shape: (32412, 29, 6)
Resampled y shape: (32412,)
Confusion Matrix for Epoch 1:
[[3224   53]
 [   0 3206]]
Epoch 1, Loss: 0.2266, Val Loss: 0.0362, ROC AUC: 0.9980
Confusion Matrix for Epoch 2:
[[3277    0]
 [2152 1054]]
Epoch 2, Loss: 0.0191, Val Loss: 1.0120, ROC AUC: 0.9987
Confusion Matrix for Epoch 3:
[[3269    8]
 [   0 3206]]
Epoch 3, Loss: 0.0193, Val Loss: 0.0180, ROC AUC: 0.9997
Confusion Matrix for Epoch 4:
[[3266   11]
 [   0 3206]]
Epoch 4, Loss: 0.0084, Val Loss: 0.0051, ROC AUC: 1.0000
Confusion Matrix for Epoch 5:
[[3272    5]
 [   0 3206]]
Epoch 5, Loss: 0.0100, Val Loss: 0.0042, ROC AUC: 0.9998
Confusion Matrix for Epoch 6:
[[3268    9]
 [   0 3206]]
Epoch 6, Loss: 0.0052, Val Loss: 0.0062, ROC AUC: 0.9999
Confusion Matrix for Epoch 7:
[[3272    5]
 [   0 3206]]
Epoch 7, Loss: 0.0044, Val Loss: 0.0028, ROC AUC: 1.0000
Confusion Matrix for Epoch 8:
[[3272    5]
 [   0 3206]]
Epoch 8, Loss: 0.0020, Val Loss: 0.003

In [None]:
# Define a GRU model for binary classification
class BinaryGRUModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(BinaryGRUModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)  # Output dimension is 1 for binary classification
        self.sigmoid = nn.Sigmoid()  # Sigmoid activation to output probabilities

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        out = self.sigmoid(out)
        return out

# Initialize the GRU model
gru_model = BinaryGRUModel(input_dim, hidden_dim, num_layers)
gru_model = gru_model.to('cuda')  # Move model to GPU

# Define loss and optimizer for GRU model
gru_optimizer = optim.Adam(gru_model.parameters(), lr=0.001)  # Adam optimizer for GRU model

# Reuse the `train_model` function
def train_gru_model(model, train_loader, val_loader, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to('cuda'), labels.to('cuda')
            gru_optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            gru_optimizer.step()
            total_loss += loss.item()

        model.eval()
        val_loss = 0
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to('cuda'), labels.to('cuda')
                outputs = model(inputs)
                val_loss += criterion(outputs.squeeze(), labels).item()
                all_preds.append(outputs.cpu().numpy())
                all_labels.append(labels.cpu().numpy())

        val_loss /= len(val_loader)
        all_preds = np.concatenate(all_preds)
        all_labels = np.concatenate(all_labels)

        # Convert probabilities to binary predictions (0 or 1) using a threshold of 0.5
        all_preds_binary = (all_preds > 0.5).astype(int)

        # Calculate and print the confusion matrix
        cm = confusion_matrix(all_labels, all_preds_binary)
        print(f'Confusion Matrix for Epoch {epoch+1}:')
        print(cm)

        # Compute ROC AUC score
        auc = roc_auc_score(all_labels, all_preds)

        print(f'Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}, ROC AUC: {auc:.4f}')

# Train the GRU model
num_epochs = 10  # Adjust as needed
train_gru_model(gru_model, train_loader, val_loader, num_epochs)


Confusion Matrix for Epoch 1:
[[3260   17]
 [   0 3206]]
Epoch 1, Loss: 0.1746, Val Loss: 0.0123, ROC AUC: 0.9995
Confusion Matrix for Epoch 2:
[[3271    6]
 [   0 3206]]
Epoch 2, Loss: 0.0129, Val Loss: 0.0055, ROC AUC: 0.9999
Confusion Matrix for Epoch 3:
[[3256   21]
 [   0 3206]]
Epoch 3, Loss: 0.0068, Val Loss: 0.0125, ROC AUC: 0.9999
Confusion Matrix for Epoch 4:
[[3268    9]
 [   0 3206]]
Epoch 4, Loss: 0.0088, Val Loss: 0.0041, ROC AUC: 1.0000
Confusion Matrix for Epoch 5:
[[3275    2]
 [   0 3206]]
Epoch 5, Loss: 0.0038, Val Loss: 0.0017, ROC AUC: 1.0000
Confusion Matrix for Epoch 6:
[[3272    5]
 [   0 3206]]
Epoch 6, Loss: 0.0063, Val Loss: 0.0025, ROC AUC: 1.0000
Confusion Matrix for Epoch 7:
[[3261   16]
 [   0 3206]]
Epoch 7, Loss: 0.0069, Val Loss: 0.0080, ROC AUC: 0.9998
Confusion Matrix for Epoch 8:
[[3272    5]
 [   0 3206]]
Epoch 8, Loss: 0.0023, Val Loss: 0.0029, ROC AUC: 1.0000
Confusion Matrix for Epoch 9:
[[3273    4]
 [   0 3206]]
Epoch 9, Loss: 0.0021, Val Loss

In [None]:
# Define a CNN model for binary classification
import torch.nn.functional as F
class BinaryCNNModel(nn.Module):
    def __init__(self, input_dim, num_channels, kernel_size):
        super(BinaryCNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=input_dim, out_channels=num_channels, kernel_size=kernel_size)
        self.conv2 = nn.Conv1d(in_channels=num_channels, out_channels=num_channels*2, kernel_size=kernel_size)
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.fc1 = nn.Linear(320, 64)  # Adjust based on actual flattened length
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Change shape to [batch_size, num_features, num_earthquakes]
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten the output
        x = F.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x


# Initialize the CNN model
input_dim = 6  # Number of features per earthquake
num_channels = 32  # Number of output channels for the first convolution layer
kernel_size = 3  # Kernel size for the convolution layers

cnn_model = BinaryCNNModel(input_dim, num_channels, kernel_size)
cnn_model = cnn_model.to('cuda')  # Move model to GPU

# Define loss and optimizer for CNN model
cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=0.0001)  # Adam optimizer for CNN model

# Reuse the `train_model` function
def train_cnn_model(model, train_loader, val_loader, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to('cuda'), labels.to('cuda')
            cnn_optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            cnn_optimizer.step()
            total_loss += loss.item()

        model.eval()
        val_loss = 0
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to('cuda'), labels.to('cuda')
                outputs = model(inputs)
                val_loss += criterion(outputs.squeeze(), labels).item()
                all_preds.append(outputs.cpu().numpy())
                all_labels.append(labels.cpu().numpy())

        val_loss /= len(val_loader)
        all_preds = np.concatenate(all_preds)
        all_labels = np.concatenate(all_labels)

        # Convert probabilities to binary predictions (0 or 1) using a threshold of 0.5
        all_preds_binary = (all_preds > 0.5).astype(int)

        # Calculate and print the confusion matrix
        cm = confusion_matrix(all_labels, all_preds_binary)


        # Compute ROC AUC score
        auc = roc_auc_score(all_labels, all_preds)

        if (epoch + 1) % 10 == 0:
                print(f'Epoch {epoch + 1}, Loss: {total_loss / len(train_loader):.4f}, Val Loss: {val_loss:.4f}, ROC AUC: {auc:.4f}')
                print(f'Confusion Matrix for Epoch {epoch+1}:')
                print(cm)
# Train the CNN model
num_epochs = 100  # Adjust as needed
train_cnn_model(cnn_model, train_loader, val_loader, num_epochs)


Epoch 10, Loss: 1.9829, Val Loss: 1.6605, ROC AUC: 0.9729
Confusion Matrix for Epoch 10:
[[3276    1]
 [ 108 3098]]
Epoch 20, Loss: 3.4141, Val Loss: 3.2046, ROC AUC: 0.9389
Confusion Matrix for Epoch 20:
[[3272    5]
 [ 335 2871]]
Epoch 30, Loss: 3.3198, Val Loss: 3.2679, ROC AUC: 0.9525
Confusion Matrix for Epoch 30:
[[3277    0]
 [ 240 2966]]
Epoch 40, Loss: 3.5130, Val Loss: 3.4198, ROC AUC: 0.9530
Confusion Matrix for Epoch 40:
[[3277    0]
 [ 240 2966]]
Epoch 50, Loss: 3.8066, Val Loss: 3.7129, ROC AUC: 0.9435
Confusion Matrix for Epoch 50:
[[3277    0]
 [ 240 2966]]
Epoch 60, Loss: 3.7831, Val Loss: 3.7139, ROC AUC: 0.9440
Confusion Matrix for Epoch 60:
[[3276    1]
 [ 240 2966]]
Epoch 70, Loss: 3.7831, Val Loss: 3.7141, ROC AUC: 0.9444
Confusion Matrix for Epoch 70:
[[3276    1]
 [ 240 2966]]
Epoch 80, Loss: 1.9093, Val Loss: 1.6709, ROC AUC: 0.9744
Confusion Matrix for Epoch 80:
[[3275    2]
 [ 108 3098]]
Epoch 90, Loss: 1.8858, Val Loss: 1.6851, ROC AUC: 0.9742
Confusion Matr

In [None]:
import torch
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
import numpy as np

# Move your data to CPU
X = X_tensor_filtered.cpu().numpy().reshape(X_tensor_filtered.size(0), -1)  # Flatten sequences
y = y_tensor_filtered.cpu().numpy()

# Perform oversampling on the sequences
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)

# Split the resampled data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Initialize and train the Random Forest model
rf_model = RandomForestClassifier(
    n_estimators=100,            # Number of trees in the forest
    max_depth=5,                # Maximum depth of each tree
    min_samples_split=5,         # Minimum number of samples required to split an internal node
    min_samples_leaf=3,          # Minimum number of samples required to be at a leaf node
    max_features='sqrt',         # Number of features to consider when looking for the best split
    random_state=42              # For reproducibility
)
rf_model.fit(X_train, y_train)

# Make predictions
y_pred_proba = rf_model.predict_proba(X_val)[:, 1]  # Get probabilities for the positive class
y_pred = rf_model.predict(X_val)

# Compute ROC AUC and Accuracy
roc_auc = roc_auc_score(y_val, y_pred_proba)
accuracy = accuracy_score(y_val, y_pred)

print(f'Random Forest ROC AUC: {roc_auc:.4f}')
print(f'Random Forest Accuracy: {accuracy:.4f}')

# Compute Confusion Matrix
cm = confusion_matrix(y_val, y_pred)
print(cm)



Random Forest ROC AUC: 1.0000
Random Forest Accuracy: 0.9980
[[3264   13]
 [   0 3206]]


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Define a CNN model with dilation for binary classification
class BinaryCNNModel(nn.Module):
    def __init__(self, input_dim, num_channels, kernel_size, dilation_rate=1):
        super(BinaryCNNModel, self).__init__()
        # Adding dilation to convolutional layers
        self.conv1 = nn.Conv1d(in_channels=input_dim, out_channels=num_channels,
                               kernel_size=kernel_size, dilation=dilation_rate)
        self.conv2 = nn.Conv1d(in_channels=num_channels, out_channels=num_channels * 2,
                               kernel_size=kernel_size, dilation=dilation_rate)
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.fc1 = nn.Linear(64 * 4, 64)  # Adjusted based on the flattened size
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Change shape to [batch_size, num_features, num_earthquakes]
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))

        x = x.view(x.size(0), -1)  # Flatten the output
        x = F.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x




# Initialize the CNN model with dilation
input_dim = 6  # Number of features per earthquake
num_channels = 32  # Number of output channels for the first convolution layer
kernel_size = 3  # Kernel size for the convolution layers
dilation_rate = 2  # Dilation rate for the convolution layers

cnn_model = BinaryCNNModel(input_dim, num_channels, kernel_size, dilation_rate)
# cnn_model = cnn_model.to('cuda')  # Move model to GPU

# Define loss and optimizer for CNN model
criterion = nn.BCELoss()
cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=0.0001)  # Adam optimizer for CNN model

# Reuse the `train_model` function
def train_cnn_model(model, train_loader, val_loader, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for inputs, labels in train_loader:
            # inputs, labels = inputs.to('cuda'), labels.to('cuda')
            cnn_optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            cnn_optimizer.step()
            total_loss += loss.item()

        model.eval()
        val_loss = 0
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for inputs, labels in val_loader:
                # inputs, labels = inputs.to('cuda'), labels.to('cuda')
                outputs = model(inputs)
                val_loss += criterion(outputs.squeeze(), labels).item()
                all_preds.append(outputs.cpu().numpy())
                all_labels.append(labels.cpu().numpy())

        val_loss /= len(val_loader)
        all_preds = np.concatenate(all_preds)
        all_labels = np.concatenate(all_labels)

        # Convert probabilities to binary predictions (0 or 1) using a threshold of 0.5
        all_preds_binary = (all_preds > 0.5).astype(int)

        # Calculate and print the confusion matrix
        cm = confusion_matrix(all_labels, all_preds_binary)

        # Compute ROC AUC score
        auc = roc_auc_score(all_labels, all_preds)

        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch + 1}, Loss: {total_loss / len(train_loader):.4f}, Val Loss: {val_loss:.4f}, ROC AUC: {auc:.4f}')
            print(f'Confusion Matrix for Epoch {epoch+1}:')
            print(cm)

# Train the CNN model
num_epochs = 100  # Adjust as needed
train_cnn_model(cnn_model, train_loader, val_loader, num_epochs)


Epoch 10, Loss: 0.0141, Val Loss: 0.0126, ROC AUC: 0.4988
Confusion Matrix for Epoch 10:
[[3242    0]
 [   5    0]]
Epoch 20, Loss: 0.0130, Val Loss: 0.0120, ROC AUC: 0.5195
Confusion Matrix for Epoch 20:
[[3242    0]
 [   5    0]]
Epoch 30, Loss: 0.0112, Val Loss: 0.0120, ROC AUC: 0.5308
Confusion Matrix for Epoch 30:
[[3242    0]
 [   5    0]]
Epoch 40, Loss: 0.0080, Val Loss: 0.0124, ROC AUC: 0.5381
Confusion Matrix for Epoch 40:
[[3242    0]
 [   5    0]]
Epoch 50, Loss: 0.0048, Val Loss: 0.0135, ROC AUC: 0.5611
Confusion Matrix for Epoch 50:
[[3242    0]
 [   5    0]]
Epoch 60, Loss: 0.0025, Val Loss: 0.0150, ROC AUC: 0.5698
Confusion Matrix for Epoch 60:
[[3242    0]
 [   5    0]]
Epoch 70, Loss: 0.0009, Val Loss: 0.0176, ROC AUC: 0.5522
Confusion Matrix for Epoch 70:
[[3241    1]
 [   5    0]]
Epoch 80, Loss: 0.0002, Val Loss: 0.0223, ROC AUC: 0.5524
Confusion Matrix for Epoch 80:
[[3241    1]
 [   5    0]]
Epoch 90, Loss: 0.0001, Val Loss: 0.0256, ROC AUC: 0.5571
Confusion Matr

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import roc_auc_score, confusion_matrix
import numpy as np

# Define a CNN model with dilation for binary classification
class BinaryCNNModel(nn.Module):
    def __init__(self, input_dim, num_channels, kernel_size, dilation_rate=1):
        super(BinaryCNNModel, self).__init__()
        # Adding dilation to convolutional layers
        self.conv1 = nn.Conv1d(in_channels=input_dim, out_channels=num_channels,
                               kernel_size=kernel_size, dilation=dilation_rate)
        self.conv2 = nn.Conv1d(in_channels=num_channels, out_channels=num_channels * 2,
                               kernel_size=kernel_size, dilation=dilation_rate)
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.fc1 = nn.Linear(64 * 4, 64)  # Adjusted based on the flattened size
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Change shape to [batch_size, num_features, num_earthquakes]
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))

        x = x.view(x.size(0), -1)  # Flatten the output
        x = F.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

# Initialize the CNN model with dilation
input_dim = 6  # Number of features per earthquake
num_channels = 32  # Number of output channels for the first convolution layer
kernel_size = 3  # Kernel size for the convolution layers
dilation_rate = 2  # Dilation rate for the convolution layers

cnn_model = BinaryCNNModel(input_dim, num_channels, kernel_size, dilation_rate)

# Define loss and optimizer for CNN model
criterion = nn.BCELoss()
cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=0.0001)  # Adam optimizer for CNN model

# Prepare data
X = X_tensor_filtered  # Shape: (a, b, c) -> a sequences, b earthquakes, c features per earthquake
y = y_tensor_filtered  # Shape: (a,) -> Labels (0 or 1) for each sequence

# Perform oversampling on the sequences
ros = RandomOverSampler(random_state=42)
X_reshaped = X.reshape(X.shape[0], -1)  # Flatten the sequences temporarily to use with RandomOverSampler
X_resampled, y_resampled = ros.fit_resample(X_reshaped, y)  # Oversample sequences, not individual earthquakes
X_resampled = X_resampled.reshape(-1, X.shape[1], X.shape[2])  # Reshape back to (a, b, c)

# Check dimensions to ensure correct reshaping
print(f"Original X shape: {X.shape}")
print(f"Resampled X shape: {X_resampled.shape}")
print(f"Resampled y shape: {y_resampled.shape}")

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32))
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# Train the CNN model
def train_cnn_model(model, train_loader, val_loader, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for inputs, labels in train_loader:
            # inputs, labels = inputs.to('cuda'), labels.to('cuda')  # Uncomment if using GPU
            cnn_optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            cnn_optimizer.step()
            total_loss += loss.item()

        model.eval()
        val_loss = 0
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for inputs, labels in val_loader:
                # inputs, labels = inputs.to('cuda'), labels.to('cuda')  # Uncomment if using GPU
                outputs = model(inputs)
                val_loss += criterion(outputs.squeeze(), labels).item()
                all_preds.append(outputs.cpu().numpy())
                all_labels.append(labels.cpu().numpy())

        val_loss /= len(val_loader)
        all_preds = np.concatenate(all_preds)
        all_labels = np.concatenate(all_labels)

        # Convert probabilities to binary predictions (0 or 1) using a threshold of 0.5
        all_preds_binary = (all_preds > 0.5).astype(int)

        # Calculate and print the confusion matrix

        # print(f'Confusion Matrix for Epoch {epoch+1}:')


        # Compute ROC AUC score
        auc = roc_auc_score(all_labels, all_preds)
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch + 1}, Loss: {total_loss / len(train_loader):.4f}, Val Loss: {val_loss:.4f}, ROC AUC: {auc:.4f}')
            cm = confusion_matrix(all_labels, all_preds_binary)
            print(cm)
# Train the CNN model
num_epochs = 100  # Adjust as needed
train_cnn_model(cnn_model, train_loader, val_loader, num_epochs)


Original X shape: torch.Size([16234, 29, 6])
Resampled X shape: (32412, 29, 6)
Resampled y shape: (32412,)
Epoch 10, Loss: 0.0688, Val Loss: 0.0087, ROC AUC: 0.9999
[[3273    4]
 [   0 3206]]
Epoch 20, Loss: 0.0007, Val Loss: 0.0024, ROC AUC: 0.9999
[[3273    4]
 [   0 3206]]
Epoch 30, Loss: 0.0032, Val Loss: 0.0035, ROC AUC: 1.0000
[[3272    5]
 [   0 3206]]
Epoch 40, Loss: 0.0001, Val Loss: 0.0043, ROC AUC: 0.9999
[[3272    5]
 [   0 3206]]
Epoch 50, Loss: 0.0002, Val Loss: 0.0029, ROC AUC: 0.9999
[[3274    3]
 [   0 3206]]
Epoch 60, Loss: 0.0000, Val Loss: 0.0033, ROC AUC: 0.9999
[[3275    2]
 [   0 3206]]
Epoch 70, Loss: 0.0000, Val Loss: 0.0164, ROC AUC: 0.9998
[[3275    2]
 [   0 3206]]
Epoch 80, Loss: 0.0039, Val Loss: 0.0035, ROC AUC: 0.9999
[[3273    4]
 [   0 3206]]
Epoch 90, Loss: 0.0039, Val Loss: 0.0031, ROC AUC: 1.0000
[[3273    4]
 [   0 3206]]
Epoch 100, Loss: 0.0039, Val Loss: 0.0024, ROC AUC: 1.0000
[[3274    3]
 [   0 3206]]
