# # Kriging and Interpolation of LightGBM Model Residuals for Soil Moisture Prediction
This script performs kriging on model residuals and interpolates them onto a 1km grid. 
The residuals are computed for each date and saved as GeoTIFF rasters, updating a 1km prediction dataset.


In [None]:
# ## Load Required Libraries
library(gstat)
library(sp)
library(dplyr)
library(raster)
library(terra)


In [None]:
# ## Step 1: Load Data
# Load the main dataset with residuals and the predicted 1km dataset.
data <- read.csv("ESACCI_data25km_residuals.csv")
predicted_data <- read.csv("dataset_1km.csv")

# Check column names to ensure consistency
print(colnames(data))
print(colnames(predicted_data))



In [None]:
# ## Step 2: Define Unique Dates and Initialize Prediction Column
# Extract unique dates and add an empty column for kriged soil moisture predictions.
dates <- unique(data$Date)
predicted_data$lgbRK_predicted_sm <- NA



# ## Step 3: Process Each Date Separately


In [None]:
# For each unique date, filter the dataset, perform kriging, and update predictions.
for (date in dates) {
  
  # ### Load Corresponding Raster
  # Load the raster for the date and stack it with other covariates if needed.
  raster_file <- paste0(date, "_.tif")
  if (!file.exists(raster_file)) {
    next  # Skip this date if raster is missing
  }
  covariate_raster <- raster(raster_file)
  covariates_stack <- stack(covariate_raster)
  proj4string(covariates_stack) <- CRS("+proj=utm +zone=30 +datum=WGS84")
  
  # ### Filter Data for Current Date
  # Convert spatial objects to data frames for subsetting.
  df_data <- as.data.frame(data)
  df_predicted <- as.data.frame(predicted_data)
  
  # Filter data and predictions for the current date.
  subset_data <- df_data %>% filter(Date == date)
  subset_predicted <- df_predicted %>% filter(Date == date)
  
  # ### Check if Both Subsets Have Data
  if (nrow(subset_data) > 0 & nrow(subset_predicted) > 0) {
    
    # Convert filtered data back to spatial objects and set coordinate reference system.
    coordinates(subset_data) <- ~ Lon + Lat
    coordinates(subset_predicted) <- ~ Lon + Lat
    proj4string(subset_data) <- CRS("+proj=utm +zone=30 +datum=WGS84")
    proj4string(subset_predicted) <- CRS("+proj=utm +zone=30 +datum=WGS84")
    
    # ### Calculate and Fit Variogram Model
    # Fit a variogram model to the residuals.
    variogram_model <- autofitVariogram(LGB_Residuals ~ 1, subset_data, model = "Sph")
    
    # ### Perform Kriging on Residuals
    # Using the variogram model, perform kriging on the residuals.
    krige_res <- krige(LGB_Residuals ~ 1, subset_data, newdata = subset_predicted, model = variogram_model$var_model)
    kriged_residuals <- krige_res$var1.pred
    
    # ### Update Predictions with Kriged Residuals
    # Add kriged residuals to the 1km predictions.
    predicted_data$lgbRK_predicted_sm[predicted_data$Date == date] <- df_predicted$lgb_predicted_sm_1km + kriged_residuals
    
    # ### Residual Kriging Interpolation for Spatial Mapping
    # Prepare a gstat model for residual kriging and interpolate over the covariate raster.
    gRK <- gstat(formula = LGB_Residuals ~ 1, locations = subset_data, model = variogram_model$var_model)
    
    map_RK <- terra::interpolate(object = covariates_stack, model = gRK, xyOnly = TRUE, 
                                 index = 1, filename = paste0(date, "_ResidualKrige.tif"), 
                                 datatype = "FLT4S", overwrite = TRUE)
    
  } else {
    # Print a message if no data is available for the date.
    print(paste("No data available for date", date))
  }
}



# ## Step 4: Save Updated Dataset


In [None]:
# Export the predicted data with kriged residuals to a CSV file.
write.csv(predicted_data, "final_dataset.csv", row.names = FALSE)
