In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd

datasets_info = {
    "landslide": "cleaned_landslide_dataset.csv",
    "glof": "cleaned_glof_dataset.csv",
    "earthquake": "cleaned_earthquake_dataset.csv",
    "liquefaction": "cleaned_liquefaction_dataset.csv",
    "sinkhole": "cleaned_sinkhole_dataset.csv",
    "tsunami": "cleaned_tsunami_dataset.csv",
    "lahar": "cleaned_lahar_dataset.csv"
}

target_columns = {
    "landslide": "Landslide_Occurred",
    "glof": "GLOF_Occurred",
    "earthquake": "Quake_Occurred",
    "liquefaction": "Liquefaction_Risk",
    "sinkhole": "Sinkhole_Reported",
    "tsunami": "Alert_Issued",
    "lahar": "Lahar_Triggered"
}

for hazard in datasets_info:
    df = pd.read_csv(datasets_info[hazard])
    target = target_columns[hazard]

    if hazard == "liquefaction":
        df[target] = df[target].round().astype(int)

    X = df.drop(columns=[target, "Date"])
    y = df[target]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f"\n🔍 {hazard.upper()} MODEL")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Feature Importances:\n", model.feature_importances_)



🔍 LANDSLIDE MODEL
Accuracy: 0.44285714285714284
Confusion Matrix:
 [[18 17]
 [22 13]]
Classification Report:
               precision    recall  f1-score   support

         0.0       0.45      0.51      0.48        35
         1.0       0.43      0.37      0.40        35

    accuracy                           0.44        70
   macro avg       0.44      0.44      0.44        70
weighted avg       0.44      0.44      0.44        70

Feature Importances:
 [0.19589405 0.18338228 0.14984938 0.16541819 0.15664265 0.14881345]

🔍 GLOF MODEL
Accuracy: 0.6
Confusion Matrix:
 [[21 13]
 [15 21]]
Classification Report:
               precision    recall  f1-score   support

         0.0       0.58      0.62      0.60        34
         1.0       0.62      0.58      0.60        36

    accuracy                           0.60        70
   macro avg       0.60      0.60      0.60        70
weighted avg       0.60      0.60      0.60        70

Feature Importances:
 [0.17452742 0.15414223 0.18142468