In [4]:
# Install
!pip install "geoai-py" --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.6/56.6 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m269.6/269.6 kB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.5/122.5 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m618.7/618.7 kB[0m [31m51.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m882.2/882.2 kB[0m [31m63.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m78.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.8/41.8 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [5]:
# Import
import geoai
import os
import zipfile
from pathlib import Path
import numpy as np


In [6]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
directory = "/content/drive/MyDrive"

This code looks to train a segmentation model on the nasa data

In [None]:
geoai.train_segmentation_model(
    images_dir=f"{directory}/image",
    labels_dir=f"{directory}/masks",
    output_dir=f"{directory}/unet_model_segmentation_model",
    architecture="unet",
    encoder_name="resnet34",
    encoder_weights="imagenet",
    num_channels=9,
    num_classes=2,  # landslide and not landslide
    batch_size=8,
    num_epochs=20,
    learning_rate=1e-5,
    val_split=0.2,
    verbose=True,
)

Metrics of segmentation model

In [None]:
geoai.plot_performance_metrics(
    history_path=f"{directory}/unet_model_segmentation_model/training_history.pth",
    figsize=(15, 5),
    verbose=True,
)

Finding all of the images which actually have predictions and keeping them only (reduce the already heavily biased data set)

In [None]:
import os
from PIL import Image
import numpy as np
# =============================
# MAIN
# =============================
def find_landslide_predictions(pred_dir, output_file, threshold):
    pred_files = sorted([
        f for f in os.listdir(pred_dir)
        if f.lower().endswith((".png", ".tif", ".tiff"))
    ])

    landslide_files = []

    for f in pred_files:
        path = os.path.join(pred_dir, f)
        try:
            img = Image.open(path).convert("L")
            arr = np.array(img)
            if np.any(arr >= threshold):
                landslide_files.append(f)
        except Exception as e:
            print(f"⚠️ Skipping {f}: {e}")

    print(f"\n✅ Found {len(landslide_files)} images with predicted landslides.\n")

    # Save list
    with open(output_file, "w") as fp:
        for f in landslide_files:
            fp.write(f"{f}\n")

    print(f"📄 Saved list to: {output_file}")


if __name__ == "__main__":
  mask_directory = f"{directory}/images"
  output_file = f"{directory}/predictions/landslide_predictions.txt"
  threshold = 1  # any pixel value > 0 counts as a landslide
  find_landslide_predictions(mask_directory, output_file, threshold)

Making predictions with the segmentation model

In [None]:
model_path = f"{directory}/unet_model_segmentation_model/best_model.pth"

# Create predictions directory
predictions_dir = f"{directory}/predictions"
os.makedirs(predictions_dir, exist_ok=True)

# Get validation images
val_images_dir = f"{directory}/images"
val_masks_dir = f"{directory}/masks"

# Read the list of images to process from the predictions file
predictions_file = f"{directory}/predictions/landslide_predictions.txt"
with open(predictions_file, 'r') as f:
    # Extract just the filenames (without .tif extension) from the text file
    target_images = set(line.strip().replace('.tif', '') for line in f if line.strip())

# Get all image files
image_files = list(Path(val_images_dir).glob("*.tif"))

# Filter to only process images in the predictions list
filtered_images = [img for img in image_files if img.stem in target_images]

# Process each filtered image
for image_path in filtered_images:
    test_image = str(image_path)
    image_name = image_path.stem

    prediction_path = f"{predictions_dir}/{image_name}_prediction.tif"
    ground_truth_path = f"{val_masks_dir}/{image_name}.tif"

    # Generate prediction
    geoai.semantic_segmentation(
        input_path=test_image,
        output_path=prediction_path,
        model_path=model_path,
        architecture="unet",
        encoder_name="resnet34",
        num_channels=9,
        num_classes=2,
        window_size=128,
        batch_size=8,
    )

    # Plot comparison with correct band indexes and divider
    save_path = f"{directory}/{image_name}_comparison.png"

    fig = geoai.plot_prediction_comparison(
        original_image=test_image,
        prediction_image=prediction_path,
        ground_truth_image=ground_truth_path,
        titles=["Original", "Prediction", "Ground Truth"],
        figsize=(15, 5),
        save_path=save_path,
        show_plot=True,
        indexes=[4, 5, 6],  # Bands 4, 5, 6 (0-indexed as 3, 4, 5)
        divider=10000,  # Your values need to be divided by 10000
    )

This code looks to train a random forest using the nasa data

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import numpy as np
import rasterio
from glob import glob
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, 
    classification_report, 
    confusion_matrix,
    precision_recall_fscore_support,
    roc_auc_score
)
import numpy as np
import rasterio
from glob import glob
import joblib

# 1. Load and prepare data
def load_images_and_masks(images_dir, labels_dir):
    image_files = sorted(glob(f"{images_dir}/*.tif"))
    mask_files = sorted(glob(f"{labels_dir}/*.tif"))

    X_list = []
    y_list = []

    for img_path, mask_path in zip(image_files, mask_files):
        # Load image
        with rasterio.open(img_path) as src:
            img = src.read()  # Shape: (channels, height, width)

        # Load mask
        with rasterio.open(mask_path) as src:
            mask = src.read(1)  # Shape: (height, width)

        # Reshape to (n_pixels, n_channels)
        img_flat = img.reshape(img.shape[0], -1).T  # (n_pixels, 9)
        mask_flat = mask.flatten()  # (n_pixels,)

        X_list.append(img_flat)
        y_list.append(mask_flat)

    X = np.vstack(X_list)
    y = np.concatenate(y_list)

    return X, y

# 2. Load data
print("Loading data...")
X, y = load_images_and_masks(
    f"{directory}/images",
    f"{directory}/masks"
)

print(f"\nData shape: {X.shape}")
print(f"Labels shape: {y.shape}")

# Calculate class distribution
class_counts = np.bincount(y.astype(int))
print(f"\nClass distribution:")
print(f"  Class 0: {class_counts[0]:,} ({class_counts[0]/len(y)*100:.2f}%)")
print(f"  Class 1: {class_counts[1]:,} ({class_counts[1]/len(y)*100:.2f}%)")
print(f"  Imbalance ratio: {class_counts[0]/class_counts[1]:.2f}:1")

# 3. Split data
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"\nTrain set size: {X_train.shape[0]:,}")
print(f"Validation set size: {X_val.shape[0]:,}")

# 4. Train Random Forest with class balancing
rf_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=20,
    min_samples_split=10,
    class_weight='balanced_subsample',  # Different balance per tree
    n_jobs=-1,
    random_state=42,
    verbose=2
)

print("\n" + "="*60)
print("Training Random Forest...")
print("="*60)
rf_model.fit(X_train, y_train)

# 5. Make predictions
print("\nMaking predictions on training set...")
y_train_pred = rf_model.predict(X_train)

print("Making predictions on validation set...")
y_val_pred = rf_model.predict(X_val)

# Get probability predictions for ROC-AUC
y_train_proba = rf_model.predict_proba(X_train)[:, 1]
y_val_proba = rf_model.predict_proba(X_val)[:, 1]

# 6. Calculate metrics
print("\n" + "="*60)
print("TRAINING SET METRICS")
print("="*60)

train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"\nAccuracy: {train_accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(y_train, y_train_pred, digits=4))

print("Confusion Matrix:")
cm_train = confusion_matrix(y_train, y_train_pred)
print(cm_train)
print(f"  True Negatives:  {cm_train[0,0]:,}")
print(f"  False Positives: {cm_train[0,1]:,}")
print(f"  False Negatives: {cm_train[1,0]:,}")
print(f"  True Positives:  {cm_train[1,1]:,}")

try:
    train_auc = roc_auc_score(y_train, y_train_proba)
    print(f"\nROC-AUC Score: {train_auc:.4f}")
except:
    print("\nROC-AUC Score: Could not calculate")

print("\n" + "="*60)
print("VALIDATION SET METRICS")
print("="*60)

val_accuracy = accuracy_score(y_val, y_val_pred)
print(f"\nAccuracy: {val_accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(y_val, y_val_pred, digits=4))

print("Confusion Matrix:")
cm_val = confusion_matrix(y_val, y_val_pred)
print(cm_val)
print(f"  True Negatives:  {cm_val[0,0]:,}")
print(f"  False Positives: {cm_val[0,1]:,}")
print(f"  False Negatives: {cm_val[1,0]:,}")
print(f"  True Positives:  {cm_val[1,1]:,}")

try:
    val_auc = roc_auc_score(y_val, y_val_proba)
    print(f"\nROC-AUC Score: {val_auc:.4f}")
except:
    print("\nROC-AUC Score: Could not calculate")

# Calculate per-class metrics
precision, recall, f1, support = precision_recall_fscore_support(y_val, y_val_pred)

print("\n" + "="*60)
print("DETAILED PER-CLASS METRICS (Validation)")
print("="*60)
print(f"\nClass 0 (Non-landslide):")
print(f"  Precision: {precision[0]:.4f}")
print(f"  Recall:    {recall[0]:.4f}")
print(f"  F1-Score:  {f1[0]:.4f}")
print(f"  Support:   {support[0]:,}")

print(f"\nClass 1 (Landslide):")
print(f"  Precision: {precision[1]:.4f}")
print(f"  Recall:    {recall[1]:.4f}")
print(f"  F1-Score:  {f1[1]:.4f}")
print(f"  Support:   {support[1]:,}")

# Feature importance
print("\n" + "="*60)
print("FEATURE IMPORTANCE")
print("="*60)
feature_importance = rf_model.feature_importances_
for i, importance in enumerate(feature_importance):
    print(f"Band {i+1}: {importance:.4f}")

# 7. Save model
model_path = f"{directory}/random_forest_model.pkl"
joblib.dump(rf_model, model_path)
print(f"\n{'='*60}")
print(f"Model saved to: {model_path}")
print("="*60)

This code displays the predictions made by the model

In [None]:
import joblib
import rasterio
import numpy as np
from pathlib import Path
import os

# Load the trained Random Forest model
model_path = f"{directory}/random_forest_model.pkl"
rf_model = joblib.load(model_path)

# Create predictions directory
predictions_dir = f"{directory}/predictions"
os.makedirs(predictions_dir, exist_ok=True)

# Get validation images
val_images_dir = f"{directory}/images"
val_masks_dir = f"{directory}/masks"

# Read the list of images to process from the predictions file
predictions_file = f"{directory}/predictions/landslide_predictions.txt"
with open(predictions_file, 'r') as f:
    target_images = set(line.strip().replace('.tif', '') for line in f if line.strip())

# Get all image files
image_files = list(Path(val_images_dir).glob("*.tif"))

# Filter to only process images in the predictions list
filtered_images = [img for img in image_files if img.stem in target_images]

# Process each filtered image
for image_path in filtered_images:
    test_image = str(image_path)
    image_name = image_path.stem

    prediction_path = f"{predictions_dir}/{image_name}_prediction.tif"
    ground_truth_path = f"{val_masks_dir}/{image_name}.tif"

    # Generate prediction with Random Forest
    with rasterio.open(test_image) as src:
        img = src.read()  # Shape: (channels, height, width)
        profile = src.profile

        # Reshape for Random Forest: (n_pixels, n_channels)
        height, width = img.shape[1], img.shape[2]
        img_flat = img.reshape(img.shape[0], -1).T  # (n_pixels, 9)

        # Predict
        print(f"Predicting {image_name}...")
        predictions_flat = rf_model.predict(img_flat)

        # Reshape back to image dimensions
        prediction = predictions_flat.reshape(height, width)

    # Save prediction
    profile.update(
        dtype=rasterio.uint8,
        count=1,
        compress='lzw'
    )

    with rasterio.open(prediction_path, 'w', **profile) as dst:
        dst.write(prediction.astype(rasterio.uint8), 1)

    print(f"Saved prediction to {prediction_path}")

    # Plot comparison
    save_path = f"{directory}/{image_name}_comparison.png"

    fig = geoai.plot_prediction_comparison(
        original_image=test_image,
        prediction_image=prediction_path,
        ground_truth_image=ground_truth_path,
        titles=["Original", "Prediction", "Ground Truth"],
        figsize=(15, 5),
        save_path=save_path,
        show_plot=True,
        indexes=[4, 5, 6],  # Bands 4, 5, 6 (0-indexed as 3, 4, 5)
        divider=10000,  # Your values need to be divided by 10000
    )

This code looks to train an xgboost model on the nasa data