In [1]:
import ee
import geemap

### Initialize the Earth Engine API - automatically authenticates if not already initialized

In [2]:
try:
    ee.Initialize()
except Exception:
    ee.Authenticate()
    ee.Initialize()

### Load the administrative boundary of Germany from the FAO GAUL database

In [3]:
germany = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level0") \
    .filter(ee.Filter.eq('ADM0_NAME', 'Germany'))

### Load and preprocess Landsat 8 Collection 2 imagery:
 - Filter by date (summer 2019) for optimal vegetation conditions
 - Filter by cloud cover (<30%) to ensure clear imagery
 - Select surface reflectance bands for analysis
 - Create median composite to remove anomalies and cloud remnants
 - Scale pixel values to 0-1 range (divide by 10000)

In [4]:
landsat = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2") \
    .filterDate('2019-06-01', '2019-09-30') \
    .filterBounds(germany) \
    .filter(ee.Filter.lt('CLOUD_COVER', 30)) \
    .select(['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']) \
    .median() \
    .divide(10000)

- Calculate Normalized Difference Vegetation Index (NDVI) using NIR (B5) and Red (B4) bands
- Higher NDVI values indicate denser vegetation and potentially higher biomass

In [5]:
ndvi = landsat.normalizedDifference(['SR_B5', 'SR_B4']).rename('NDVI')

- Load the NASA/ORNL aboveground biomass carbon density dataset
- Mask it to include only areas identified as forest in the Hansen dataset (treecover > 0%)

In [6]:
carbon = ee.ImageCollection("NASA/ORNL/biomass_carbon_density/v1") \
    .select('agb').first().rename('Carbon') \
    .updateMask(ee.Image("UMD/hansen/global_forest_change_2023_v1_11").select('treecover2000').gt(0))

- Load percent tree cover from the Hansen Global Forest Change dataset
- This represents forest density - higher values indicate denser forests

In [7]:
treecover = ee.Image("UMD/hansen/global_forest_change_2023_v1_11") \
    .select('treecover2000').rename('TCD')

- Load elevation data from the SRTM dataset
- Topography influences forest growth patterns and carbon storage capacity

In [8]:
elevation = ee.Image("USGS/SRTMGL1_003").select('elevation').rename('ELEVATION')

- Combine all predictor variables (NDVI, tree cover, elevation) into a single multi-band image


In [9]:
input_features = ndvi.addBands([treecover, elevation])

- Add carbon data to create a complete dataset for training and validation


In [10]:
input_with_carbon = input_features.addBands(carbon)

- Create a random sample of 3000 forest points across Germany for model training and testing
- Scale of 1000 meters balances detail with computational efficiency
- dropNulls ensures we only include points with valid data in all bands

In [11]:
training_points = input_with_carbon.sample(
    region=germany.geometry(),
    scale=1000,
    numPixels=3000,
    seed=42,
    dropNulls=True
)

- Split the sample data into training (70%) and testing (30%) datasets
- This allows for independent validation of model performance

In [12]:
split = 0.7
training_data = training_points.randomColumn('random').filter(ee.Filter.lt('random', split))
testing_data = training_points.randomColumn('random').filter(ee.Filter.gte('random', split))

- Define the predictor variables to be used in the model


In [13]:
predictors = ['NDVI', 'TCD', 'ELEVATION']

 - Configure and train a Random Forest regression model:
 - 50 trees provides a balance between accuracy and computational efficiency
 - minLeafPopulation of 5 prevents overfitting on small data subsets
 - REGRESSION output mode predicts continuous carbon values rather than classes

In [14]:
trained_model = ee.Classifier.smileRandomForest(
    numberOfTrees=50,
    minLeafPopulation=5
).setOutputMode('REGRESSION').train(
    features=training_data,
    classProperty='Carbon',
    inputProperties=predictors
)

- Apply the trained model to the testing dataset to evaluate performance

In [15]:
test = testing_data.classify(trained_model, 'predicted_carbon')

- Calculate prediction errors and squared errors for each testing point


In [16]:
def addError(feature):
    error = ee.Number(feature.get('Carbon')).subtract(ee.Number(feature.get('predicted_carbon')))
    return feature.set({'error': error, 'error_squared': error.pow(2)})

errors = test.map(addError)

- Calculate Root Mean Square Error (RMSE) - a measure of average prediction error


In [17]:
sum_squared_errors = errors.aggregate_sum('error_squared')
count = errors.size()
rmse = ee.Number(sum_squared_errors).divide(count).sqrt()

- Calculate the coefficient of determination (R²) to assess model fit
- R² measures the proportion of variance in carbon values explained by the model

In [18]:
mean_observed = testing_data.aggregate_mean('Carbon')

In [19]:
def addTSS(feature):
    deviation = ee.Number(feature.get('Carbon')).subtract(mean_observed)
    return feature.set({'tss': deviation.pow(2)})

tss_features = testing_data.map(addTSS)
total_sum_squares = tss_features.aggregate_sum('tss')

In [20]:
residual_sum_squares = errors.aggregate_sum('error_squared')

r_squared = ee.Number(1).subtract(residual_sum_squares.divide(total_sum_squares))

- Print model performance metrics to assess prediction accuracy

In [21]:
print('Forest Carbon Model Metrics:')
print('RMSE:', rmse.getInfo(), 'tonnes/ha')
print('R²:', r_squared.getInfo())

Forest Carbon Model Metrics:
RMSE: 22.416481998794584 tonnes/ha
R²: 0.6176660129348073


- Apply the trained model to predict carbon density across all of Germany

In [22]:
carbon_prediction = input_features.classify(trained_model, 'predicted_carbon')

- Create a forest mask to limit analysis to areas with significant tree cover (>25%)
- This threshold helps exclude sparse vegetation that isn't true forest

In [23]:
forest_mask = treecover.gt(30)  # Only areas with tree cover

- Calculate total forest area by summing pixels identified as forest
- Convert pixel areas to km² for reporting

In [24]:
forest_area_result = forest_mask.rename('forest_area').multiply(ee.Image.pixelArea()).reduceRegion(
    reducer=ee.Reducer.sum(),
    geometry=germany.geometry(),
    scale=1000,
    maxPixels=1e10
).getInfo()

In [25]:
forest_area_km2 = forest_area_result['forest_area'] / 1e6  # Convert to km²

- Calculate mean carbon density and total carbon stock across Germany's forest

In [26]:
stats = carbon_prediction.updateMask(forest_mask).reduceRegion(
    reducer=ee.Reducer.mean().combine(
        reducer2=ee.Reducer.sum(),
        sharedInputs=True
    ),
    geometry=germany.geometry(),
    scale=1000,
    maxPixels=1e10
).getInfo()

In [27]:
mean_carbon = stats['predicted_carbon_mean']
total_carbon = stats['predicted_carbon_sum']

- Print summary statistics for Germany's forest carbon

In [28]:
print("\nForest Carbon Summary:")
print("Forest Area (km²):", forest_area_km2)
print("Mean Forest Carbon (tonnes/ha):", mean_carbon)
print("Total Forest Carbon (tonnes):", total_carbon)


Forest Carbon Summary:
Forest Area (km²): 135414.4147307172
Mean Forest Carbon (tonnes/ha): 65.97603777984679
Total Forest Carbon (tonnes): 14037311.47206551


- Create an interactive map using geemap for visualization

In [29]:
Map = geemap.Map()
Map.centerObject(germany, 6)

- Add layers

In [30]:
vis_params = {
    'min': 0,
    'max': 150,
    'palette': ['#ffffcc', '#c2e699', '#78c679', '#31a354', '#006837']
}

Map.addLayer(
    carbon.clip(germany),
    vis_params,
    'Actual Forest Carbon (tonnes/ha)'
)

Map.addLayer(
    carbon_prediction.updateMask(forest_mask).clip(germany),
    vis_params,
    'Predicted Forest Carbon (tonnes/ha)'
)

# Add legend and display
Map.add_legend(
    title="Forest Carbon (tonnes/ha)",
    colors=['#ffffcc', '#c2e699', '#78c679', '#31a354', '#006837'],
    labels=['0-30', '30-60', '60-90', '90-120', '120-150']
)

Map.addLayer(
    germany.style(**{'color': 'black', 'fillColor': '00000000'}),
    {},
    'Germany Boundary'
)

In [31]:
display(Map)

Map(center=[51.052829262339436, 10.372114873284264], controls=(WidgetControl(options=['position', 'transparent…

In [33]:
Map.to_html("Carbon_Prediction.html")

In [34]:
print('Forest Carbon Model Metrics:')
print('RMSE:', rmse.getInfo(), 'tonnes/ha')
print('R²:', r_squared.getInfo())

Forest Carbon Model Metrics:
RMSE: 22.416481998794584 tonnes/ha
R²: 0.6176660129348073
