In [5]:

# LANDSLIDE PREDICTION


# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Install & import libraries


import pandas as pd
import numpy as np
import joblib

from xgboost import XGBClassifier
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix,
    classification_report
)

# Load cleaned dataset
data_path = "/content/drive/MyDrive/final_project/dataset_landslide/landslide_cleaned.csv"
df = pd.read_csv(data_path)

# Shuffle the dataset to randomize class distribution
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Feature & target selection (NO lat/lon)
X = df[
    [
        "elevation",
        "slope",
        "aspect",
        "landcover_class",
        "7D_mean_rain",
        "24H_rain",
        "ndvi"
    ]
]

y = df["landslide"]

# Handle class imbalance
scale_pos_weight = (y == 0).sum() / (y == 1).sum()

# Define XGBoost model
model = XGBClassifier(
    n_estimators=600,
    max_depth=6,
    learning_rate=0.03,
    subsample=0.85,
    colsample_bytree=0.85,
    gamma=0.1,
    min_child_weight=3,
    reg_alpha=0.1,
    reg_lambda=1.0,
    scale_pos_weight=scale_pos_weight,
    objective="binary:logistic",
    eval_metric="auc",
    random_state=42,
    n_jobs=-1
)

# Train on full randomized dataset
model.fit(X, y)

# Predict on the same dataset
y_pred = model.predict(X)
y_prob = model.predict_proba(X)[:, 1]

# Add predictions to dataframe
df["predicted_landslide"] = y_pred
df["predicted_probability"] = y_prob

# Evaluate performance on same dataset
print("\n===== MODEL PERFORMANCE ON SAME DATA =====")
print("Accuracy :", accuracy_score(y, y_pred))
print("Precision:", precision_score(y, y_pred))
print("Recall   :", recall_score(y, y_pred))
print("F1 Score :", f1_score(y, y_pred))
print("ROC AUC  :", roc_auc_score(y, y_prob))

print("\nConfusion Matrix:")
print(confusion_matrix(y, y_pred))

print("\nClassification Report:")
print(classification_report(y, y_pred))

# Save trained model
model_path = "/content/drive/MyDrive/final_project/model_landslide/landslide_xgboost.pkl"
joblib.dump(model, model_path)
print("\nModel saved to:", model_path)

# Save predictions with original data
output_path = "/content/drive/MyDrive/final_project/landslide_predictions_randomized.csv"
df.to_csv(output_path, index=False)
print("Predictions saved to:", output_path)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

===== MODEL PERFORMANCE ON SAME DATA =====
Accuracy : 0.9924675765985161
Precision: 0.9989928584508332
Recall   : 0.9889422641167407
F1 Score : 0.993942154406741
ROC AUC  : 0.9995580548100976

Confusion Matrix:
[[ 6613    11]
 [  122 10911]]

Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      6624
           1       1.00      0.99      0.99     11033

    accuracy                           0.99     17657
   macro avg       0.99      0.99      0.99     17657
weighted avg       0.99      0.99      0.99     17657


✅ Model saved to: /content/drive/MyDrive/final_project/model_landslide/landslide_xgboost.pkl
✅ Predictions saved to: /content/drive/MyDrive/final_project/landslide_predictions_randomized.csv
