# Random Forest Methane Classifier

This notebook trains a random forest model on Sentinel-2 imagery to predict high methane probability. All TOA bands are used as features along with the engineered MBSP fractional signal.

## Setup

In [None]:
import datetime as dt

import ee
import geemap

# Authenticate with Earth Engine
ee.Authenticate()
ee.Initialize()

## Helper Functions

In [None]:
def mask_s2_clouds(image: ee.Image) -> ee.Image:
    """Mask clouds using the QA60 band."""
    qa = image.select("QA60")
    cloud_bit_mask = 1 << 10
    cirrus_bit_mask = 1 << 11
    mask = qa.bitwiseAnd(cloud_bit_mask).eq(0).And(qa.bitwiseAnd(cirrus_bit_mask).eq(0))
    masked = image.updateMask(mask).divide(10000)
    return masked.copyProperties(image, image.propertyNames())

In [None]:
def mbsp_fractional_image(image: ee.Image, region: ee.Geometry) -> ee.Image:
    """Compute the MBSP fractional methane signal for a Sentinel-2 scene."""
    num_img = image.select("B11").multiply(image.select("B12"))
    den_img = image.select("B12").multiply(image.select("B12"))
    num_sum = num_img.reduceRegion(reducer=ee.Reducer.sum(), geometry=region, scale=20, bestEffort=True)
    den_sum = den_img.reduceRegion(reducer=ee.Reducer.sum(), geometry=region, scale=20, bestEffort=True)
    slope = ee.Number(num_sum.get("B11")).divide(ee.Number(den_sum.get("B12")))
    mbsp = image.select("B12").multiply(slope).subtract(image.select("B11")).divide(image.select("B11")).rename("R")
    return mbsp.set({"slope": slope})

## Image Selection

In [None]:
# Location and date range
lat, lon = 31.6585, 5.9053
start = dt.date(2019, 10, 1)
end = dt.date(2019, 10, 15)

point = ee.Geometry.Point(lon, lat)
collection = (
    ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
    .filterDate(str(start), str(end))
    .filterBounds(point)
    .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20))
    .sort("system:time_start")
    .map(mask_s2_clouds)
)

images = collection.toList(collection.size())
count = images.size().getInfo()
print(f"Found {count} images")

## Training Data

In [None]:
band_names = ee.Image(images.get(0)).bandNames()
region = point.buffer(1000).bounds()
img = ee.Image(images.get(0))
mbsp_img = mbsp_fractional_image(img, region)

# Threshold to define high methane pixels
label_img = mbsp_img.lt(-0.02).rename("label")

# Stack features and label
training_img = img.addBands(mbsp_img).addBands(label_img)

# Sample training data
train_samples = training_img.sample(region=region, scale=20, numPixels=500, seed=1)

## Random Forest Model

In [None]:
classifier = ee.Classifier.smileRandomForest(numberOfTrees=50).setOutputMode("PROBABILITY")
trained = classifier.train(train_samples, "label", band_names.add("R"))

## Classification

In [None]:
probability = img.addBands(mbsp_img).select(band_names.add("R")).classify(trained)

## Visualize

In [None]:
m = geemap.Map(center=(lat, lon), zoom=12)
prob_vis = {"min": 0, "max": 1, "palette": ["white", "red"]}
m.addLayer(probability, prob_vis, "methane probability")
m.addLayer(img.select(["B4", "B3", "B2"]), {"min": 0, "max": 0.3}, "RGB", False)
m