#### 1. Import modules

In [None]:
library(readxl)
library(spThin)
library(openxlsx)
library(sf)

#### 2. Read data

In [None]:
# read occurrence data
data <- read_excel("E:/Working/Sam/Experiment/Data/Gentiana pannonica Scop/Pre_Gentiana_data.xlsx", sheet = "Sheet1")

# view df
str(data)

# turn to spatial frame
sf_data <- st_as_sf(data, coords = c("X", "Y"), crs = 4326) # 4326=WGS 84
# project to (UTM Zone 33N)
sf_data_utm <- st_transform(sf_data, crs = 32633) # 32633=UTM Zone 33N
# extract UTM coordinates
utm_coords <- st_coordinates(sf_data_utm)
# create new df with UTM coordinates
data_utm <- data.frame(ID = data$ID, X = utm_coords[,1], Y = utm_coords[,2])
str(data_utm)

#### 3. Do sparsing

In [None]:
### Step1 ###
# delete duplicated location data
unique_data <- data_utm[!duplicated(data_utm[c("X", "Y")]), ]
str(unique_data)
thinLength<-800

# use spThin to thin the data
thinned_data <- thin(
  loc.data = unique_data,
  lat.col = "Y",
  long.col = "X",
  spec.col = "ID", # if no species column, set to NULL; if species unique, error occurs while setting
  thin.par = thinLength, # thinning distance
  reps = 10, # number of repetitions
  locs.thinned.list.return = TRUE,
  write.files = FALSE
)
# check thinned data
best_thinned <- thinned_data[[which.max(sapply(thinned_data, nrow))]]
str(best_thinned)
# rename columns
colnames(best_thinned) <- c("X", "Y")

### Step2 ###
# add ID column back to the thinned data
best_thinned_with_id <- merge(best_thinned, unique_data, by = c("X", "Y"))
head(best_thinned_with_id)
# turn back to geographic coordinates
best_thinned_sf <- st_as_sf(best_thinned_with_id, coords = c("X", "Y"), crs = 32633)
best_thinned_geo <- st_transform(best_thinned_sf, crs = 4326)
# extract coordinates
geo_coords <- st_coordinates(best_thinned_geo)
# create df with new coordinates 
best_thinned_geo_df <- data.frame(ID = best_thinned_with_id$ID, X = geo_coords[,1], Y = geo_coords[,2])
head(best_thinned_geo_df, 10)

### Step3 ###
# save thinned data
write.xlsx(best_thinned_geo_df, "E:/Working/Sam/Experiment/R_Code/code_data_sparse/Thin_Gentiana_data_800m.xlsx", sheetName = "ThinnedData")
