# 24. Lidar point cloud cluster processing

This notebooks sets up files to process all Lidar point clouds on the Jassmin cluster

In [1]:
# Convenient jupyter setup
%load_ext autoreload
%autoreload 2

In [2]:
import geopandas as gpd
import pandas as pd
import pathlib
import datetime
from tqdm.autonotebook import tqdm

from src.constants import USER_PATH, PAISAGENSLIDAR_PATH, EBALIDAR_PATH, SRC_PATH
from src.utils.os import list_content, latest_matching_file
from src.utils.slurm import sbatch_header, sbatch

from src.processing.slurm_jobcreator import lidR_gridmetrics_job, lidR_processing_job

  from tqdm.autonotebook import tqdm


### Helper functions

In [3]:
is_empty = lambda folder: not any(folder.iterdir())
contains = lambda folder, name: len(list(folder.glob(name))) > 0

## 1. Paisagenslidar - Cluster normalisation

In [4]:
paisagens_meta_path = latest_matching_file(PAISAGENSLIDAR_PATH / "*metadata")
paisagens_meta = gpd.read_file(paisagens_meta_path)

In [5]:
paisagens_paths = paisagens_meta.groupby("survey").first().path.apply(lambda x: pathlib.Path(x).parent)
paisagens_years = paisagens_meta.groupby("survey")[["year", "crs", "region", "plot_code", "creation_date"]].first()
paisagens_nfiles = paisagens_meta.groupby("survey").filename.count()
paisagens_size = paisagens_meta.groupby("survey")[["area (ha)",  "file_size (MB)", "points (Mio.)"]].sum()

In [6]:
paisagens_surveys = pd.concat([paisagens_paths, paisagens_years, paisagens_nfiles, paisagens_size], join="inner", axis=1)
paisagens_surveys["name"] = paisagens_surveys.apply(lambda x: f"{x.region}_{x.plot_code}_{x.year}", axis=1)
paisagens_surveys = paisagens_surveys.sort_values(by=["year", "region", "plot_code"])

### Normalisation: Create slurm job

In [11]:
for idx, survey in tqdm(paisagens_surveys.iterrows(), total=len(paisagens_surveys)):
    
    out_dir = PAISAGENSLIDAR_PATH / "processed" / str(survey["year"]) / survey["name"]
    
    if contains(out_dir, "processed") and not is_empty(out_dir / "processed"):
        continue
    
    job_file = lidR_processing_job(survey, 
                                   out_dir = out_dir,
                                   queue="short-serial",
                                   max_time = datetime.time(23, 59, 0),
                                   chunk_size=0,
                                   buffer=50,
                                   filter_quantile = 0.95,
                                   filter_sensitivity = 1.1,
                                   filter_gridsize= 10,
                                   as_file=False, 
                                   perform_check=False,
                                   compress_intermediates=True, 
                                   save_intermediates=True,
                                   required_memory_per_node=15000,
                                  )
        
    print(job_file)
    break
    # Submit job
    #sbatch(job_file)
    print(f"Submitted job for {survey['name']}")

  0%|          | 0/160 [00:00<?, ?it/s]

#!/bin/bash
#SBATCH --partition=short-serial
#SBATCH --time=23:59:00  # Set a timeout for the job in HH:MM:SS
#SBATCH --mem=15000 # Set the amount of memory for per node in MB.
#SBATCH --job-name=JAC_A04_2015
#SBATCH --chdir=/gws/nopw/j04/forecol/data/Paisagenslidar/processed/2015/JAC_A04_2015/slurm
#SBATCH --output=slurm_JAC_A04_2015-%j.out
#SBATCH --error=slurm_JAC_A04_2015-%j.err


# --- TEMPLATE AUTOGENERATED ---


source ~/.bashrc
conda activate r4_env

echo "$(which R)"
echo "$(R --version)"

echo "template generation time: 2021-06-24 13:00:18.130690"
echo "survey: JAC_A04_2015"
echo "path: /gws/nopw/j04/forecol/data/Paisagenslidar/Mission_2015/JAC_A04_2015_LiDAR/JAC_A04_2015_laz"

Rscript --verbose --no-save /home/users/svm/Code/gedi_biomass_mapping/src/processing/lidar_processing_script.R --lidar_path=/gws/nopw/j04/forecol/data/Paisagenslidar/Mission_2015/JAC_A04_2015_LiDAR/JAC_A04_2015_laz --save_path=/gws/nopw/j04/forecol/data/Paisagenslidar/processed/2015/JAC_A04_2015 --buff

### Metrics: Create slurm job

In [12]:
for idx, survey in paisagens_surveys.iterrows():
    
    if survey["path"] is None or pd.isna(survey["path"]):
        continue
    
    out_dir = PAISAGENSLIDAR_PATH / "processed" / str(survey["year"]) / survey["name"]
    
    if contains(out_dir, "processed") and not is_empty(out_dir / "processed"):
        pass
    else:
        print(survey["name"], "not available.")
        continue
    
    job_file = lidR_gridmetrics_job(survey, 
                                    out_dir,
                                    metrics = {"n_points": [1],
                                               "n_pulses": [1],
                                               "n_ground_points": [1],
                                               "max": [10, 50],
                                               "quantile_0.05": [10, 50],
                                               "quantile_0.1": [10, 50],
                                               "kurtosis": [10, 50],
                                               "mean": [10, 50],
                                               "interquartile_range": [10, 50],
                                               "longo_biomass": [10, 50], 
                                               "canopy_height": [1]

                                    },
                                    buffer = 10,
                                    chunk_size = 0,
                                    as_file = False,
                                    overwrite=False,
                                    queue="short-serial",
                                    required_memory_per_node=8000,    
                                    max_time = datetime.time(10, 0, 0)
                                   )
    print(job_file)
    break
    # Submit job
    #sbatch(job_file)
    print(f"Submitted job for {survey['name']}")

#!/bin/bash
#SBATCH --partition=short-serial
#SBATCH --time=10:00:00  # Set a timeout for the job in HH:MM:SS
#SBATCH --mem=8000 # Set the amount of memory for per node in MB.
#SBATCH --job-name=CUI_A01_2008_grid_metrics
#SBATCH --chdir=/gws/nopw/j04/forecol/data/Paisagenslidar/processed/2008/CUI_A01_2008/slurm
#SBATCH --output=slurm_CUI_A01_2008_metrics-%j.out
#SBATCH --error=slurm_CUI_A01_2008_metrics-%j.err


# --- TEMPLATE AUTOGENERATED ---


source ~/.bashrc
conda activate r4_env

echo "$(which R)"
echo "$(R --version)"

echo "template generation time: 2021-06-24 13:00:32.757802"
echo "survey: CUI_A01_2008"
echo "path: /gws/nopw/j04/forecol/data/Paisagenslidar/processed/2008/CUI_A01_2008/processed"
echo "metrics: {'n_points': [1], 'n_pulses': [1], 'n_ground_points': [1], 'max': [10, 50], 'quantile_0.05': [10, 50], 'quantile_0.1': [10, 50], 'kurtosis': [10, 50], 'mean': [10, 50], 'interquartile_range': [10, 50], 'longo_biomass': [10, 50], 'canopy_height': [1]}"


# `Canopy height` 

-----------

## 2. EBA - Cluster normalisation

In [13]:
eba_meta_path = latest_matching_file(EBALIDAR_PATH / "*metadata")
eba_meta = gpd.read_file(eba_meta_path)

In [14]:
eba_laz = {path.name: {"path": path,
                       "name": path.name.split(".")[0],
                       "size": round(path.stat().st_size / 1024**2,2)
                      } 
           for path in (EBALIDAR_PATH / "laz_EBA").glob("*.laz")
          }

In [15]:
df = pd.DataFrame(eba_laz).T.convert_dtypes()
df = df.rename(columns={"size": "size (MB)"})
df["reduced_filename"] = df["name"] + ".laz"

In [19]:
eba_new_meta = pd.merge(eba_meta.drop(columns=["path", "size (MB)", "name"]), df, how="outer", on="reduced_filename")
eba_new_meta["size (MB)"] = eba_new_meta["size (MB)"].astype(float)
eba_new_meta.index = eba_new_meta["reduced_filename"]
eba_new_meta.index.name = "survey"
eba_new_meta = eba_new_meta.sort_index()
eba_new_meta

Unnamed: 0_level_0,survey,transect,field_data,hyperspect,obs,campaign,criteria,datafile,elev_maxim,elev_mean,...,canopy_rel,elev_sqrt_,elev_curt_,profile_ar,random,reduced_filename,geometry,path,name,size (MB)
survey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NP_T-0001.laz,NP_T-0001.laz,0001,,,,1.0,PRODES+TERRACLASS,NP_T-0001_dn_g_n_ch1_5.laz,38.12,13.4278,...,0.402270,14.9651,15.9663,50.9332,yes,NP_T-0001.laz,"POLYGON ((4113515.087 10124216.741, 4113517.77...",/gws/nopw/j04/forecol/data/EBA_lidar/laz_EBA/N...,NP_T-0001,233.47
NP_T-0002.laz,NP_T-0002.laz,0002,,,,1.0,PRODES+TERRACLASS,NP_T-0002_dn_g_n_ch1_5.laz,53.67,15.6308,...,0.357771,18.2027,19.9904,41.2544,yes,NP_T-0002.laz,"POLYGON ((4051446.715 10275374.329, 4051471.18...",/gws/nopw/j04/forecol/data/EBA_lidar/laz_EBA/N...,NP_T-0002,254.80
NP_T-0003.laz,NP_T-0003.laz,0003,,,,1.0,PRODES+TERRACLASS,NP_T-0003_dn_g_n_ch1_5.laz,72.00,16.3030,...,0.327655,18.6746,20.6112,37.3803,yes,NP_T-0003.laz,"POLYGON ((4030556.091 10289424.141, 4030585.79...",/gws/nopw/j04/forecol/data/EBA_lidar/laz_EBA/N...,NP_T-0003,204.94
NP_T-0004.laz,NP_T-0004.laz,0004,,,,1.0,PRODES+TERRACLASS,NP_T-0004_dn_g_n_ch1_5.laz,58.54,13.6670,...,0.297323,16.5975,18.8821,33.5121,yes,NP_T-0004.laz,"POLYGON ((4117057.589 10328223.327, 4117057.60...",/gws/nopw/j04/forecol/data/EBA_lidar/laz_EBA/N...,NP_T-0004,208.09
NP_T-0005.laz,NP_T-0005.laz,0005,,,,1.0,PRODES+TERRACLASS,NP_T-0005_dn_g_n_ch1_5.laz,62.72,15.2655,...,0.602625,17.4348,19.1750,38.3784,yes,NP_T-0005.laz,"POLYGON ((4073980.272 10342069.880, 4073985.18...",/gws/nopw/j04/forecol/data/EBA_lidar/laz_EBA/N...,NP_T-0005,234.38
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NP_T-2001.laz,NP_T-2001.laz,2001,,,,2.0,Directed,NP_T-2001_dn_g_n_ch1_5.laz,75.70,19.9900,...,0.544592,22.7049,24.3514,48.3164,no,NP_T-2001.laz,"POLYGON ((4687939.276 9807283.147, 4687940.579...",/gws/nopw/j04/forecol/data/EBA_lidar/laz_EBA/N...,NP_T-2001,225.71
NP_T-2002.laz,NP_T-2002.laz,2002,,,,2.0,Directed,NP_T-2002_dn_g_n_ch1_5.laz,65.02,21.2876,...,0.451701,23.6641,25.3670,47.6701,no,NP_T-2002.laz,"POLYGON ((4015043.537 8959928.665, 4015049.131...",/gws/nopw/j04/forecol/data/EBA_lidar/laz_EBA/N...,NP_T-2002,190.41
NP_T-2004.laz,NP_T-2004.laz,2004,,,,2.0,Directed,NP_T-2004_dn_g_n_ch1_5.laz,38.56,13.5921,...,0.445896,15.4306,16.6037,48.4385,no,NP_T-2004.laz,"POLYGON ((4965389.690 8676632.304, 4965375.149...",/gws/nopw/j04/forecol/data/EBA_lidar/laz_EBA/N...,NP_T-2004,185.83
NP_T-2005.laz,NP_T-2005.laz,2005,,,,2.0,Directed,NP_T-2005_dn_g_n_ch1_5.laz,38.47,13.9174,...,0.504889,15.5562,16.6058,49.7625,no,NP_T-2005.laz,"POLYGON ((4966156.783 8700836.900, 4966152.552...",/gws/nopw/j04/forecol/data/EBA_lidar/laz_EBA/N...,NP_T-2005,196.11


In [17]:
#eba_new_meta.to_file(eba_meta_path.parent / f"eba_file_metadata_v{datetime.date.today()}", driver="GPKG")

In [18]:
smallest = df.iloc[df["size (MB)"].values.argmin():df["size (MB)"].values.argmin()+1]
smallest

Unnamed: 0,path,name,size (MB),reduced_filename
NP_T-1041_03.laz,/gws/nopw/j04/forecol/data/EBA_lidar/laz_EBA/N...,NP_T-1041_03,53.26,NP_T-1041_03.laz


### Normalisation: Create slurm job

In [20]:
for idx, survey in tqdm(eba_new_meta.iterrows(), total=len(eba_new_meta)):
    
    if survey["path"] is None or pd.isna(survey["path"]):
        continue
    
    out_dir = EBALIDAR_PATH / "laz_EBA_processed" / survey["name"]
    
    if contains(out_dir, "processed") and not is_empty(out_dir / "processed"):
        continue
    #else:
        #print("Not processed:", survey["name"])
        #continue
    
    job_file = lidR_processing_job(survey, 
                                   out_dir = out_dir,
                                   queue="short-serial",
                                   max_time = datetime.time(23, 59, 0),
                                   chunk_size=1000,
                                   buffer=50,
                                   filter_quantile = 0.95,
                                   filter_sensitivity = 1.1,
                                   filter_gridsize= 10,
                                   as_file=False, 
                                   perform_check=False,
                                   compress_intermediates=True, 
                                   save_intermediates=True,
                                   required_memory_per_node=16000,
                                  )
    
    print(job_file)
    break
    # Submit job
    sbatch(job_file)
    print(f"Submitted job for {survey['name']}")

  0%|          | 0/930 [00:00<?, ?it/s]

### Metrics: Create slurm job

In [21]:
for idx, survey in tqdm(eba_new_meta.iterrows(), total=len(eba_new_meta)):
    
    if survey["path"] is None or pd.isna(survey["path"]):
        continue
    
    out_dir = EBALIDAR_PATH / "laz_EBA_processed" / survey["name"]
    
    job_file = lidR_gridmetrics_job(# File params
                                    survey, 
                                    out_dir,

                                    # Processing params
                                    metrics = {"point_density": [1],
                                               "pulse_density": [1],
                                               "ground_point_density": [1],
                                               "n_points": [1],
                                               "n_pulses": [1],
                                               "n_ground_points": [1],
                                               "max": [10, 50],
                                               "quantile_0.05": [10, 50],
                                               "quantile_0.1": [10, 50],
                                               "kurtosis": [10, 50],
                                               "mean": [10, 50],
                                               "interquartile_range": [10, 50],
                                               "longo_biomass": [10, 50], 
                                               "canopy_height": [1, 2]

                                    },
                                    buffer = 10,
                                    chunk_size = 0,
                                    as_file = False,

                                    # Queue params
                                    queue="short-serial",
                                    max_time = datetime.time(10, 0, 0),
                                    #expected_time = datetime.time(1, 30, 0),
                                    required_memory_per_node=8000,
                               )
    print(job_file)
    # Submit job
    break
    sbatch(job_file)
    print(f"Submitted job for {survey['name']}")

  0%|          | 0/930 [00:00<?, ?it/s]

#!/bin/bash
#SBATCH --partition=short-serial
#SBATCH --time=10:00:00  # Set a timeout for the job in HH:MM:SS
#SBATCH --mem=8000 # Set the amount of memory for per node in MB.
#SBATCH --job-name=NP_T-0001_grid_metrics
#SBATCH --chdir=/gws/nopw/j04/forecol/data/EBA_lidar/laz_EBA_processed/NP_T-0001/slurm
#SBATCH --output=slurm_NP_T-0001_metrics-%j.out
#SBATCH --error=slurm_NP_T-0001_metrics-%j.err


# --- TEMPLATE AUTOGENERATED ---


source ~/.bashrc
conda activate r4_env

echo "$(which R)"
echo "$(R --version)"

echo "template generation time: 2021-06-24 13:02:05.482052"
echo "survey: NP_T-0001"
echo "path: /gws/nopw/j04/forecol/data/EBA_lidar/laz_EBA_processed/NP_T-0001/processed"
echo "metrics: {'point_density': [1], 'pulse_density': [1], 'ground_point_density': [1], 'n_points': [1], 'n_pulses': [1], 'n_ground_points': [1], 'max': [10, 50], 'quantile_0.05': [10, 50], 'quantile_0.1': [10, 50], 'kurtosis': [10, 50], 'mean': [10, 50], 'interquartile_range': [10, 50], 'longo_biomass': [1

----------------

# To be deleted: Expedited processing for relevant files

In [119]:
relevant = [
    "NP_T-0408.laz",
    "NP_T-0859.laz",
    "NP_T-0145.laz",
    "NP_T-0751.laz",
    "NP_T-2002.laz",
    "NP_T-0416.laz",
    "NP_T-0669.laz",
]

for idx, survey in eba_new_meta.loc[relevant].iterrows():
    
    #if survey["path"] is None or pd.isna(survey["path"]):
    #    continue
    
    out_dir = EBALIDAR_PATH / "laz_EBA_processed" / survey["name"]
    
    job_file = create_lidar_gridmetrics_job(idx, 
                                survey, 
                                out_dir,
                                metrics = {"point_density": [1],
                                           "pulse_density": [1],
                                           "ground_point_density": [1],
                                           "n_points": [1],
                                           "n_pulses": [1],
                                           "n_ground_points": [1],
                                           "max": [10, 50],
                                           "quantile_0.05": [10, 50],
                                           "quantile_0.1": [10, 50],
                                           "kurtosis": [10, 50],
                                           "mean": [10, 50],
                                           "interquartile_range": [10, 50],
                                           "longo_biomass": [10, 50], 
                                           "canopy_height": [1, 2]
                                
                                },
                                buffer = 10,
                                chunk_size = 0,
                                as_file = True,
                                overwrite=False,
                                queue="test",
                                
                               )
    print(job_file)
    break
    # Submit job
    #sbatch(job_file)
    print(f"Submitted job for {survey['name']}")

Submitted job for NP_T-0408
Submitted job for NP_T-0859
Submitted job for NP_T-0145
Submitted job for NP_T-0751
Submitted job for NP_T-2002
Submitted job for NP_T-0416
Submitted job for NP_T-0669


In [11]:
paisagens_relevant = [
    "JAM_A03_2013_laz",
    "JAM_A02_2013_laz",
    "JAM_A02a_2014_laz",
    "JAM_A02c_2015_LiDAR",
    "SAN_A02_2014_laz",
    
]

for idx, survey in paisagens_surveys.loc[paisagens_relevant][4::].iterrows():
    
    #if survey["path"] is None or pd.isna(survey["path"]):
    #    continue
    
    out_dir = PAISAGENSLIDAR_PATH / "processed" / str(survey["year"]) / survey["name"]
    
    if contains(out_dir, "processed") and not is_empty(out_dir / "processed"):
        pass
    else:
        print(survey["name"], "not available.")
        continue
    
    job_file = lidR_gridmetrics_job(survey, 
                                    out_dir,
                                    metrics = {"n_points": [1],
                                               "n_pulses": [1],
                                               "n_ground_points": [1],
                                               "max": [10, 50],
                                               "quantile_0.05": [10, 50],
                                               "quantile_0.1": [10, 50],
                                               "kurtosis": [10, 50],
                                               "mean": [10, 50],
                                               "interquartile_range": [10, 50],
                                               "longo_biomass": [10, 50], 
                                               "canopy_height": [1]

                                    },
                                    buffer = 10,
                                    chunk_size = 0,
                                    as_file = False,
                                    overwrite=False,
                                    queue="test",
                                    required_memory_per_node=8000,    
                                    max_time = datetime.time(10, 0, 0)
                                   )
    print(job_file)
    break
    # Submit job
    sbatch(job_file)
    print(f"Submitted job for {survey['name']}")

#!/bin/bash
#SBATCH --partition=test
#SBATCH --time=10:00:00  # Set a timeout for the job in HH:MM:SS
#SBATCH --mem=8000 # Set the amount of memory for per node in MB.
#SBATCH --job-name=SAN_A02_2014_grid_metrics
#SBATCH --chdir=/gws/nopw/j04/forecol/data/Paisagenslidar/processed/2014/SAN_A02_2014/slurm
#SBATCH --output=slurm_SAN_A02_2014_metrics-%j.out
#SBATCH --error=slurm_SAN_A02_2014_metrics-%j.err


# --- TEMPLATE AUTOGENERATED ---


source ~/.bashrc
conda activate r4_env

echo "$(which R)"
echo "$(R --version)"

echo "template generation time: 2021-06-24 18:16:43.778255"
echo "survey: SAN_A02_2014"
echo "path: /gws/nopw/j04/forecol/data/Paisagenslidar/processed/2014/SAN_A02_2014/processed"
echo "metrics: {'n_points': [1], 'n_pulses': [1], 'n_ground_points': [1], 'max': [10, 50], 'quantile_0.05': [10, 50], 'quantile_0.1': [10, 50], 'kurtosis': [10, 50], 'mean': [10, 50], 'interquartile_range': [10, 50], 'longo_biomass': [10, 50], 'canopy_height': [1]}"


# `Canopy height` at grid 

----

# Legacy

In [5]:
def create_lidar_processing_job(idx, 
                                survey, 
                                r_file,
                                out_dir,
                                buffer = 50,
                                chunk_size = 0,
                                filter_quantile = 0.95, 
                                filter_sensitivity = 1.1,
                                filter_gridsize = 10,
                                compress_intermediates = True,
                                save_intermediates = True,
                                perform_check = True,
                                as_file = True,
                                **kwargs
                               ):
    # Path for lidar data
    lidar_path = pathlib.Path(survey["path"])
    assert lidar_path.exists()
    
    # Set up save prefix
    save_prefix = survey["name"] + "_"

    # Create working directory path
    slurm_dir = out_dir / "slurm"
    slurm_dir.mkdir(parents=True, exist_ok=True)

    # Create sbatch header
    header = sbatch_header(
                  job_name = idx + "-->" + survey["name"],
                  working_directory=slurm_dir,
                  output_file_pattern=f"slurm_{survey['name']}-%j.out",
                  error_file_pattern=f"slurm_{survey['name']}-%j.err",
                  **kwargs)

    body = seperator("Activating conda environment")
    body += "source ~/.bashrc\n"
    body += "conda activate r4_env\n"
    body += seperator("Checking R version")
    body += echo("$(which R)")
    #body += "echo $(which R)\n"
    body += echo("$(R --version)")
    body += seperator("Analysing file")
    body += echo(f"template generation time: {datetime.datetime.now()}")
    body += echo(f"survey: {survey['name']}")
    body += echo(f"path: {lidar_path}")
    body += seperator("Executing R script")
    body += (f"Rscript --verbose --no-save {r_file}" 
             f" --lidar_path={lidar_path}" 
             f" --save_path={out_dir}"
             f" --buffer={buffer}"
             f" --chunk_size={chunk_size}"
             f" --filter_quantile={filter_quantile}"
             f" --filter_sensitivity={filter_sensitivity}"
             f" --filter_gridsize={filter_gridsize}"
             f" --save_prefix={save_prefix}"
             f" --perform_check={'TRUE' if perform_check else 'FALSE'}"
             f" --save_intermediates={'TRUE' if save_intermediates else 'FALSE'}"
             f" --compress_intermediates={'TRUE' if compress_intermediates else 'FALSE'}"
            )

    if as_file:
        job_file = slurm_dir / f"slurm_{survey['name']}.sh"
        with open(job_file, "w") as file:
            file.write(header + body)
        return job_file
    else: 
        return header+body

In [13]:
ALLOWED_METRICS = ("point_density", "pulse_density", "ground_point_density",
                   "n_points", "n_pulses", "n_ground_points",
                   "max", "standard_dev", "mask", "mean", "kurtosis", 
                   "interquartile_range", "quantile", "longo_biomass")

r_gridmetrics_file = "/home/users/svm/Code/gedi_biomass_mapping/src/processing/lidar_gridmetrics_script.R"
r_canopy_height_file = "/home/users/svm/Code/gedi_biomass_mapping/src/processing/lidar_canopy_height_script.R"

def create_lidar_gridmetrics_job(idx, 
                                survey, 
                                out_dir,
                                metrics,
                                buffer = 10,
                                chunk_size = 0,
                                save_prefix="",
                                check_index=True,
                                overwrite=False,
                                as_file = False,
                                **kwargs
                               ):
    # Path for lidar data
    lidar_path = pathlib.Path(out_dir / "processed")
    assert lidar_path.exists()
    
    # Set up save prefix
    save_prefix = survey["name"] + "_"

    # Create working directory path
    slurm_dir = out_dir / "slurm"
    slurm_dir.mkdir(parents=True, exist_ok=True)
    grid_metrics_dir = out_dir / "grid_metrics"
    grid_metrics_dir.mkdir(parents=True, exist_ok=True)

    # Create sbatch header
    header = sbatch_header(
                  job_name = survey["name"] + "_grid_metrics",
                  working_directory=slurm_dir,
                  output_file_pattern=f"slurm_{survey['name']}_metrics-%j.out",
                  error_file_pattern=f"slurm_{survey['name']}_metrics-%j.err",
                  **kwargs)

    body = seperator("Activating conda environment")
    body += "source ~/.bashrc\n"
    body += "conda activate r4_env\n"
    body += seperator("Checking R version")
    body += echo("$(which R)")
    body += echo("$(R --version)")
    body += seperator("Analysing file")
    body += echo(f"template generation time: {datetime.datetime.now()}")
    body += echo(f"survey: {survey['name']}")
    body += echo(f"path: {lidar_path}")
    body += echo(f"metrics: {metrics}")
    body += seperator("Executing R script")
    
    if "canopy_height" in metrics.keys():
        for gridsize in metrics.pop("canopy_height"):
            body += f"\n# `Canopy height` at grid size `{gridsize}m`\n"
            body += (f"Rscript --verbose --no-save {r_canopy_height_file}" 
                     f" --lidar_path={lidar_path}"
                     f" --gridsize={gridsize}" 
                     f" --save_path={out_dir}"
                     f" --buffer={buffer}"
                     f" --chunk_size={chunk_size}"
                     f" --save_prefix={save_prefix}"
                     f" --overwrite={'TRUE' if overwrite else 'FALSE'}"
                     f" --check_index={'TRUE' if check_index else 'FALSE'}"
                     "\n"
                    )
    
    for metric in metrics.keys():
        assert metric in ALLOWED_METRICS or "quantile_" in metric, f"{metric} not allowed."
        for gridsize in metrics[metric]:
            body += f"\n# Metric `{metric}` at grid size `{gridsize}m`\n"
            body += (f"Rscript --verbose --no-save {r_gridmetrics_file}" 
                     f" --lidar_path={lidar_path}" 
                     f" --metric={metric if 'quantile' not in metric else 'quantile'}"
                     f" --gridsize={gridsize}" 
                     f" --save_path={out_dir}"
                     f" --buffer={buffer}"
                     f" --chunk_size={chunk_size}"
                     f" --save_prefix={save_prefix}"
                     f" --overwrite={'TRUE' if overwrite else 'FALSE'}"
                     f" --check_index={'TRUE' if check_index else 'FALSE'}"
                     f"{' --quantile='+str(metric.split('_')[-1]) if 'quantile' in metric else ''}"
                     "\n"
                    )

    if as_file:
        job_file = slurm_dir / f"slurm_{survey['name']}_grid_metrics.sh"
        with open(job_file, "w") as file:
            file.write(header + body)
        return job_file
    else: 
        return header+body