In [51]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ahmeduzaki/global-earthquake-tsunami-risk-assessment-dataset")

print("Path to dataset files:", path) #be saved outside of the current session

Path to dataset files: /kaggle/input/global-earthquake-tsunami-risk-assessment-dataset


In [52]:
import pandas as pd
df = pd.read_csv('/kaggle/input/global-earthquake-tsunami-risk-assessment-dataset/earthquake_data_tsunami.csv')
print("Complete")

X= df[['magnitude', 'cdi', 'mmi', 'sig', 'nst', 'dmin', 'gap', 'depth',
                 'latitude', 'longitude', 'Year', 'Month']]
y = 'tsunami'

train_df, test_df = train_test_split(
    df, test_size=0.25, random_state=42
)


Complete


In [53]:
class CustomDatasetFromDataFrame(Dataset):
    def __init__(self, dataframe, features_cols, target_col, mean=None, std=None):
        # Change to float32 explicitly
        self.features = torch.tensor(dataframe[features_cols].values, dtype=torch.float32)
        self.targets = torch.tensor(dataframe[target_col].values, dtype=torch.long)
        
        if mean is not None and std is not None:
            # Ensure mean and std are also float32
            mean = torch.tensor(mean.values, dtype=torch.float32)
            std = torch.tensor(std.values, dtype=torch.float32)
            self.features = (self.features - mean) / (std + 1e-8)
    
    def __len__(self):
        return len(self.targets)
    
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

In [54]:
import pandas as pd
import numpy as np



features_cols = ['magnitude', 'cdi', 'mmi', 'sig', 'nst', 'dmin', 'gap', 'depth',
                 'latitude', 'longitude', 'Year', 'Month']

train_mean = train_df[features_cols].mean()
train_std = train_df[features_cols].std()


train_std[train_std == 0] = 1


In [55]:
features_cols = ['magnitude', 'cdi', 'mmi', 'sig', 'nst', 'dmin', 'gap', 'depth',
                 'latitude', 'longitude', 'Year', 'Month']
target_col = 'tsunami'

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df[target_col])

train_mean = train_df[features_cols].mean()
train_std = train_df[features_cols].std()

train_dataset = CustomDatasetFromDataFrame(
    train_df, 
    features_cols, 
    target_col, 
    mean=train_mean,
    std=train_std
)
test_dataset = CustomDatasetFromDataFrame(
    test_df, 
    features_cols, 
    target_col, 
    mean=train_mean, 
    std=train_std
)

print(f"Training samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")
       

Training samples: 625
Test samples: 157


In [56]:
print("First 5 samples from PyTorch training dataset (after fix):")
for i in range(5):
    features, targets = train_dataset[i]
    print(f"Sample {i}:")
    print(f"  Features: {features}")
    print(f"  Target: {targets}")
    print("-" * 20)


First 5 samples from PyTorch training dataset (after fix):
Sample 0:
  Features: tensor([ 1.6476,  0.2107,  0.6865,  0.1282,  0.2398, -0.5858,  0.1482, -0.4134,
        -0.1418,  0.6836, -0.5454, -1.5931])
  Target: 0
--------------------
Sample 1:
  Features: tensor([ 1.8660,  1.4617,  2.0595,  2.0106, -0.9163, -0.3673, -0.1909, -0.4276,
        -1.6856,  1.0256,  0.5990,  1.2085])
  Target: 1
--------------------
Sample 2:
  Features: tensor([ 1.8660,  1.1489,  2.0595,  5.8387, -0.9163,  0.2578, -0.5379, -0.4791,
         0.9030,  0.2736,  0.4355, -0.7526])
  Target: 0
--------------------
Sample 3:
  Features: tensor([-0.5367, -0.1021, -0.6865, -0.4813, -0.9163,  1.1136, -0.1524, -0.4508,
         0.0839, -1.1136,  0.1085,  0.3680])
  Target: 1
--------------------
Sample 4:
  Features: tensor([-0.3182, -1.3531,  0.0000, -0.4933,  0.2558, -0.5858, -1.0004, -0.3543,
         0.4928, -1.3066, -1.6897, -0.7526])
  Target: 0
--------------------


In [57]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np


class TsunamiNN(nn.Module):
    def __init__(self, input_size, hidden_sizes, num_classes):
        super(TsunamiNN, self).__init__()
        
        layers = []
        prev_size = input_size
        
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.3)) 
            prev_size = hidden_size

        layers.append(nn.Linear(prev_size, num_classes))
        
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.network(x)


def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for features, labels in train_loader:
        features, labels = features.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            
            outputs = model(features)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(test_loader)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

input_size = 12  # Number of features
hidden_sizes = [64, 32, 16]  # Hidden layer sizes
num_classes = 2  # Binary classification (tsunami or not)

model = TsunamiNN(input_size, hidden_sizes, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50
best_test_acc = 0.0

print("Starting training...")
for epoch in range(num_epochs):
    train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, device)
    test_loss, test_acc = evaluate_model(model, test_loader, criterion, device)
    
    # Save best model
    if test_acc > best_test_acc:
        best_test_acc = test_acc
        torch.save(model.state_dict(), 'best_tsunami_model.pth')
    
    if (epoch + 1) % 5 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}]')
        print(f'  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'  Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')

print(f'\n   Best Test Accuracy: {best_test_acc:.2f}%')

Using device: cpu
Starting training...
Epoch [5/50]
  Train Loss: 0.4263, Train Acc: 80.32%
  Test Loss: 0.3865, Test Acc: 84.08%
Epoch [10/50]
  Train Loss: 0.3628, Train Acc: 83.52%
  Test Loss: 0.3382, Test Acc: 84.08%
Epoch [15/50]
  Train Loss: 0.3427, Train Acc: 85.28%
  Test Loss: 0.3184, Test Acc: 85.99%
Epoch [20/50]
  Train Loss: 0.3220, Train Acc: 84.32%
  Test Loss: 0.3034, Test Acc: 85.99%
Epoch [25/50]
  Train Loss: 0.3024, Train Acc: 85.76%
  Test Loss: 0.2837, Test Acc: 87.26%
Epoch [30/50]
  Train Loss: 0.3106, Train Acc: 85.76%
  Test Loss: 0.2726, Test Acc: 89.17%
Epoch [35/50]
  Train Loss: 0.2894, Train Acc: 85.44%
  Test Loss: 0.2644, Test Acc: 87.90%
Epoch [40/50]
  Train Loss: 0.2778, Train Acc: 87.20%
  Test Loss: 0.2681, Test Acc: 87.90%
Epoch [45/50]
  Train Loss: 0.2584, Train Acc: 87.84%
  Test Loss: 0.2575, Test Acc: 89.81%
Epoch [50/50]
  Train Loss: 0.2460, Train Acc: 88.80%
  Test Loss: 0.2568, Test Acc: 89.81%

Best Test Accuracy: 90.45%
