In [None]:
library(plyr)
library(sdmpredictors)
library(tidyterra)
library(tidyverse)
library(ggplot2)
library(raster)
library(terra)
#https://www.ecologi.st/spatial-r/raster-gis-operations-in-r-with-terra.html

In [None]:
(Bluecarbon_sites <- read.csv("data//BC/BCsampling_sites.tsv", sep ="\t", header=T) %>% 
                     dplyr::mutate(Site = ifelse(Site =="Site1", "Upper", "Lower")) %>%
                     dplyr::rename(estuary = Estuary) %>%
                     dplyr::rename(Biogeographical_region = Biogeographical.region))

In [None]:
ZA_admin_st <- geodata::gadm("ZA", path = "data/sdm/")
ZA_admin_sf = sf::st_as_sf(ZA_admin_st)
ZA_extent <- raster::extent(10, 40, -37, -22)

In [None]:
get_raster <- function(path, pattern){

    file_ls <- Sys.glob(file.path(path , pattern))
    raster <- raster::stack(file_ls)
    
    return(raster)
} 

In [None]:
gmed_raster <- get_raster("GMED/*/", "*.asc")
dim(gmed_raster)

In [None]:
gmed_cropped <- raster::crop(gmed_raster, ZA_extent) 
dim(gmed_cropped)

In [None]:
as.data.frame(raster::extract(gmed_cropped, Bluecarbon_sites[c("x", "y")])) %>%
dplyr::select(where(function(x) any(is.na(x))))

In [None]:
get_approx_NA <-  function(xy, env_layer, max_dist){
    
    raster_dist <- replace(distanceFromPoints(env_layer, xy), is.na(env_layer), NA)
    dist_index <- which.min(raster_dist)
    dist_value <- raster_dist@data@values[dist_index]
    approx_value <- if(dist_value > max_dist) NA else  env_layer@data@values[dist_index]
    approx_df <- data.frame(x = xy['x'], y = xy['y'], dist_value, approx_value)
    names(approx_df)[3:4] <- c(paste('dist', names(env_layer), sep = '_'), names(env_layer))
        
return(approx_df)  
}

In [None]:
proxim_var <- function(layer_name, coord_df, raster_stack, max_dist){
    
    env_layer <- raster_stack[[layer_name]]
    proxim_na <- do.call(rbind, apply(X = coord_df, MARGIN = 1, FUN = get_approx_NA, env_layer, max_dist))

return(proxim_na)
}

In [None]:
proxim_df_ls <- lapply(names(gmed_cropped), proxim_var, coord_df = Bluecarbon_sites[c('x','y')], raster_stack = gmed_cropped, max_dist = 10000)
proxim_df <- join_all(proxim_df_ls, by = c('x','y'), type ="full")

In [None]:
proxim_df

In [None]:
fill_envdata <- function(my_layer, env_layers, coord_df, proxim_data){
    
    env_layer <- env_layers[[my_layer]]
    for (i in 1: nrow(Bluecarbon_sites)){
        xy <- coord_df[i,c('x','y')]
        env_value <- raster::extract(env_layer, xy)
        if(is.na(env_value) & !missing(proxim_data)){
            proxim_value <- proxim_df[proxim_df$x == coord_df[i,]$x & proxim_df$y ==  coord_df[i,]$y, my_layer]
            coord_df[i, my_layer] <- proxim_value
        }
        else{
            coord_df[i, my_layer] <- env_value  
        }
    }   
return(coord_df)
}

In [None]:
filled_env_data <- lapply(names(gmed_cropped), fill_envdata, env_layers = gmed_cropped, coord_df = Bluecarbon_sites, proxim_data = proxim_df)

In [None]:
proxim_df <- join_all(filled_env_data, by = c("estuary","Site","x","y","date","Biogeographical_region"), type ="full")

In [None]:
proxim_df

In [None]:
#Testing filling of NAs 
#https://stackoverflow.com/questions/27562076/if-raster-value-na-search-and-extract-the-nearest-non-na-pixel
set.seed(2)
# create a 10x10 raster
r <- raster(ncol=10,nrow=10, xmn=0, xmx=10, ymn=0,ymx=10)
r[] <- 1:10
r[sample(1:ncell(r), size = 25)] <- NA
# plot the raster
plot(r, axes=F, box=F)
segments(x0 = 0, y0 = 0:10, x1 = 10, y1 = 0:10, lty=2)
segments(y0 = 0, x0 = 0:10, y1 = 10, x1 = 0:10, lty=2)
# create sample points and add them to the plot
xy = data.frame(x=runif(10,1,10), y=runif(10,1,10))
points(xy, pch=3)
text(x = xy$x, y = xy$y, labels = as.character(1:nrow(xy)), pos=4, cex=0.7, xpd=NA)

In [None]:
sampled = apply(X = xy, MARGIN = 1, FUN = function(xy) r@data@values[which.min(replace(distanceFromPoints(r, xy), is.na(r), NA))])
extracted = extract(x = r, y = xy)
sampled
(sampled_df <- data.frame(xy, extracted, sampled))

In [None]:
#testing the implementation
(test_df <- do.call(rbind, apply(X = xy, MARGIN = 1, FUN = get_approx_NA, env_layer = r, max_dist = 1000000)))

In [None]:
sampled_df$sampled == test_df$layer