In [1]:
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd


## Landslide

In [2]:
# Load dataset
df = pd.read_csv("cleaned_landslide_dataset.csv")

# Define features and target
X = df.drop(columns=["Landslide_Occurred", "Date"])
y = df["Landslide_Occurred"]

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train CatBoost model
model = CatBoostClassifier(verbose=0, random_state=42)
model.fit(X_train, y_train)

# Predict & evaluate
y_pred = model.predict(X_test)

print("🌧️ Landslide (CatBoost) Accuracy:", accuracy_score(y_test, y_pred))
print("📊 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("📋 Classification Report:\n", classification_report(y_test, y_pred))

🌧️ Landslide (CatBoost) Accuracy: 0.4857142857142857
📊 Confusion Matrix:
 [[20 15]
 [21 14]]
📋 Classification Report:
               precision    recall  f1-score   support

         0.0       0.49      0.57      0.53        35
         1.0       0.48      0.40      0.44        35

    accuracy                           0.49        70
   macro avg       0.49      0.49      0.48        70
weighted avg       0.49      0.49      0.48        70



## GLOF

In [3]:
# Load dataset
df = pd.read_csv("cleaned_glof_dataset.csv")

# Define features and target
X = df.drop(columns=["GLOF_Occurred", "Date"])
y = df["GLOF_Occurred"]

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train CatBoost model
model = CatBoostClassifier(verbose=0, random_state=42)
model.fit(X_train, y_train)

# Predict & evaluate
y_pred = model.predict(X_test)

print("🌊 GLOF (CatBoost) Accuracy:", accuracy_score(y_test, y_pred))
print("📊 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("📋 Classification Report:\n", classification_report(y_test, y_pred))

🌊 GLOF (CatBoost) Accuracy: 0.5428571428571428
📊 Confusion Matrix:
 [[18 16]
 [16 20]]
📋 Classification Report:
               precision    recall  f1-score   support

         0.0       0.53      0.53      0.53        34
         1.0       0.56      0.56      0.56        36

    accuracy                           0.54        70
   macro avg       0.54      0.54      0.54        70
weighted avg       0.54      0.54      0.54        70



## EarthQuake

In [4]:
# Load dataset
df = pd.read_csv("cleaned_earthquake_dataset.csv")

# Define features and target
X = df.drop(columns=["Quake_Occurred", "Date"])
y = df["Quake_Occurred"]

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train CatBoost model
model = CatBoostClassifier(verbose=0, random_state=42)
model.fit(X_train, y_train)

# Predict & evaluate
y_pred = model.predict(X_test)

print("🌍 Earthquake (CatBoost) Accuracy:", accuracy_score(y_test, y_pred))
print("📊 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("📋 Classification Report:\n", classification_report(y_test, y_pred))

🌍 Earthquake (CatBoost) Accuracy: 0.5428571428571428
📊 Confusion Matrix:
 [[14 19]
 [13 24]]
📋 Classification Report:
               precision    recall  f1-score   support

         0.0       0.52      0.42      0.47        33
         1.0       0.56      0.65      0.60        37

    accuracy                           0.54        70
   macro avg       0.54      0.54      0.53        70
weighted avg       0.54      0.54      0.54        70



## Liquefaction

In [5]:
# Load dataset
df = pd.read_csv("cleaned_liquefaction_dataset.csv")

# Fix target to be integer class labels
df["Liquefaction_Risk"] = df["Liquefaction_Risk"].round().astype(int)

# Features & target
X = df.drop(columns=["Liquefaction_Risk", "Date"])
y = df["Liquefaction_Risk"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train CatBoost (multiclass mode)
model = CatBoostClassifier(
    verbose=0,
    random_state=42,
    loss_function='MultiClass'
)
model.fit(X_train, y_train)

# Predict & evaluate
y_pred = model.predict(X_test)

print("💧 Liquefaction (CatBoost) Accuracy:", accuracy_score(y_test, y_pred))
print("📊 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("📋 Classification Report:\n", classification_report(y_test, y_pred))

💧 Liquefaction (CatBoost) Accuracy: 0.5857142857142857
📊 Confusion Matrix:
 [[33  8]
 [21  8]]
📋 Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.80      0.69        41
           1       0.50      0.28      0.36        29

    accuracy                           0.59        70
   macro avg       0.56      0.54      0.53        70
weighted avg       0.57      0.59      0.55        70



## SinkHole

In [6]:
# Load dataset
df = pd.read_csv("cleaned_sinkhole_dataset.csv")

# Features & target
X = df.drop(columns=["Sinkhole_Reported", "Date"])
y = df["Sinkhole_Reported"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train CatBoost model
model = CatBoostClassifier(verbose=0, random_state=42)
model.fit(X_train, y_train)

# Predict & evaluate
y_pred = model.predict(X_test)

print("🕳️ Sinkhole (CatBoost) Accuracy:", accuracy_score(y_test, y_pred))
print("📊 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("📋 Classification Report:\n", classification_report(y_test, y_pred))

🕳️ Sinkhole (CatBoost) Accuracy: 0.5285714285714286
📊 Confusion Matrix:
 [[18 18]
 [15 19]]
📋 Classification Report:
               precision    recall  f1-score   support

         0.0       0.55      0.50      0.52        36
         1.0       0.51      0.56      0.54        34

    accuracy                           0.53        70
   macro avg       0.53      0.53      0.53        70
weighted avg       0.53      0.53      0.53        70



## Tsunami

In [7]:
# Load dataset
df = pd.read_csv("cleaned_tsunami_dataset.csv")

# Features & target
X = df.drop(columns=["Alert_Issued", "Date"])
y = df["Alert_Issued"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train CatBoost model
model = CatBoostClassifier(verbose=0, random_state=42)
model.fit(X_train, y_train)

# Predict & evaluate
y_pred = model.predict(X_test)

print("🌊 Tsunami (CatBoost) Accuracy:", accuracy_score(y_test, y_pred))
print("📊 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("📋 Classification Report:\n", classification_report(y_test, y_pred))

🌊 Tsunami (CatBoost) Accuracy: 0.5285714285714286
📊 Confusion Matrix:
 [[19 17]
 [16 18]]
📋 Classification Report:
               precision    recall  f1-score   support

         0.0       0.54      0.53      0.54        36
         1.0       0.51      0.53      0.52        34

    accuracy                           0.53        70
   macro avg       0.53      0.53      0.53        70
weighted avg       0.53      0.53      0.53        70



## Lahar

In [8]:
# Load dataset
df = pd.read_csv("cleaned_lahar_dataset.csv")

# Features & target
X = df.drop(columns=["Lahar_Triggered", "Date"])
y = df["Lahar_Triggered"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train CatBoost model
model = CatBoostClassifier(verbose=0, random_state=42)
model.fit(X_train, y_train)

# Predict & evaluate
y_pred = model.predict(X_test)

print("🌋 Lahar (CatBoost) Accuracy:", accuracy_score(y_test, y_pred))
print("📊 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("📋 Classification Report:\n", classification_report(y_test, y_pred))

🌋 Lahar (CatBoost) Accuracy: 0.44285714285714284
📊 Confusion Matrix:
 [[ 7 24]
 [15 24]]
📋 Classification Report:
               precision    recall  f1-score   support

         0.0       0.32      0.23      0.26        31
         1.0       0.50      0.62      0.55        39

    accuracy                           0.44        70
   macro avg       0.41      0.42      0.41        70
weighted avg       0.42      0.44      0.42        70

