In [19]:
from sklearn.model_selection import train_test_split
import pandas as pd
from pathlib import Path
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score, roc_auc_score, fbeta_score
from sklearn.model_selection import StratifiedKFold
from datetime import datetime
import torch
from tabpfn import TabPFNClassifier, TabPFNRegressor

from dotenv import load_dotenv
load_dotenv()

HF_TOKEN = os.getenv("HF_TOKEN")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [20]:
data_path = Path("../data/processed/earthquake_data_tsunami_scaled.csv")
data_df = pd.read_csv(data_path)

Previously in feature engineering part, we have identified longitude, latitude, dmin, nst, Year, Month, month_number do not seem to be useful. We will check this insight with our models. Baseline will not include any of these features.

In [21]:
features_to_exclude = ['tsunami']
X = data_df.drop(columns=[col for col in features_to_exclude if col in data_df.columns])
y = data_df['tsunami']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

Creating Models

## Analyze the result of TabPFNClassifier

In [None]:
import numpy as np
import pandas as pd
from tabpfn import TabPFNClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, fbeta_score, roc_auc_score, confusion_matrix

threshold = 0.1

model = TabPFNClassifier(device="cuda") 

model.fit(X_train, y_train)

y_train_proba = model.predict_proba(X_train)[:, 1]
y_test_proba = model.predict_proba(X_test)[:, 1]

y_train_pred = (y_train_proba >= threshold).astype(int)
y_test_pred = (y_test_proba >= threshold).astype(int)

test_accuracy = accuracy_score(y_test, y_test_pred)
test_precision = precision_score(y_test, y_test_pred, zero_division=0)
test_recall = recall_score(y_test, y_test_pred, zero_division=0)
test_f1 = f1_score(y_test, y_test_pred, zero_division=0)
test_f2 = fbeta_score(y_test, y_test_pred, beta=2, zero_division=0)
test_roc_auc = roc_auc_score(y_test, y_test_proba)

train_accuracy = accuracy_score(y_train, y_train_pred)
train_roc_auc = roc_auc_score(y_train, y_train_proba)

cm = confusion_matrix(y_test, y_test_pred)
if cm[1, :].sum() > 0:
    fn_rate = cm[1, 0] / cm[1, :].sum() * 100
else:
    fn_rate = 0.0

print(f"\nTabPFN Results (Fixed Threshold: {threshold})")
print("=" * 40)
print(f"Test Accuracy:       {test_accuracy:.4f}")
print(f"Test Precision:      {test_precision:.4f}")
print(f"Test Recall:         {test_recall:.4f}")
print(f"Test F1 Score:       {test_f1:.4f}")
print(f"Test F2 Score:       {test_f2:.4f} (Primary Metric)")
print(f"Test ROC-AUC:        {test_roc_auc:.4f}")
print("-" * 40)
print(f"False Negative Rate: {fn_rate:.2f}%")
print(f"Train/Test Gap (Acc):{train_accuracy - test_accuracy:.4f}")

final_results = {
    'Test Accuracy': test_accuracy,
    'Test Precision': test_precision,
    'Test Recall': test_recall,
    'Test F1': test_f1,
    'Test F2': test_f2,
    'Test ROC-AUC': test_roc_auc,
    'False Negative Rate': fn_rate,
    'Train Accuracy': train_accuracy,
    'Train ROC-AUC': train_roc_auc
}



TabPFN Results (Fixed Threshold: 0.1)
Test Accuracy:       0.7029
Test Precision:      0.4350
Test Recall:         0.9506
Test F1 Score:       0.5969
Test F2 Score:       0.7685 (Primary Metric)
Test ROC-AUC:        0.9170
----------------------------------------
False Negative Rate: 4.94%
Train/Test Gap (Acc):0.0457


## TabPFNClassifier Results Summary

**Key Findings:**
- **Test Accuracy**: 0.7029
- **Test Precision**: 0.4350
- **Test Recall**: 0.9506 - High recall
- **Test F1 Score**: 0.5969 
- **Test F2 Score**: 0.7685 
- **Test ROC-AUC**: 0.9170 (91.70%) - Excellent class separation
- **False Negative Rate**: 4.94% 
- **Train/Test Gap (Accuracy)**: 0.0457 - Minimal overfitting

**Analysis:**
While TabPFN achieves mediocre accuracy (70.29%) and excellent ROC-AUC (91.70%), the high recall is crucial for tsunami detection, as missing tsunamis can have catastrophic consequences.