Instance type: `m5.8xlarge`

Container using: 24 cores and 120 Gi

In [1]:
import os
import subprocess
import glob

from IPython import get_ipython

ipython = get_ipython()

In [44]:
bucket_with_data = "hsi-kale"

input_dir_data = "/shared_volume/input_data"

dir_specie = "Ponca_DV_loc"

file_specie = "poncadav2"

dir_mask_specie = "Ponca_DV"

file_mask_specie = "poncamask.tif"

variable_specie = "poncaloc"

mask_variable_specie = "ponca_mask"

string1 = "R " + variable_specie + " <- rgdal::readOGR("

string2 = os.path.join(input_dir_data, dir_specie)

string3 = mask_variable_specie + " <- raster::raster("

string4 = os.path.join(input_dir_data, dir_mask_specie, file_mask_specie)

string_data_input = "".join([string1, "\"", string2, "\",", 
                             "\"", file_specie, "\"",");",
                             string3, "\"", string4, "\"", ")"])


In [None]:
if not os.path.exists(input_dir_data):
    os.makedirs(input_dir_data)

    
cmd_subprocess = ["aws", "s3", "cp",
                  "s3://" + bucket_with_data,
                  input_dir_data,
                  "--recursive"]

subprocess.run(cmd_subprocess)

In [None]:
#
ipython.magic("load_ext rpy2.ipython")
#

string_libraries = """R library(rgdal); library(raster)"""

ipython.magic(string_libraries)

ipython.magic(string_data_input)

poncaloc = ipython.magic("Rget poncaloc")
ponca_mask = ipython.magic("Rget ponca_mask")

In [3]:
#
ipython.magic("load_ext rpy2.ipython")
print(poncaloc)
ipython.magic("Rpush poncaloc")
#

string_libraries = """R library(rgdal)"""

ipython.magic(string_libraries)

string_transform = """R poncaloc_transf <- sp::spTransform(poncaloc,
                                                           CRSobj = "+proj=lcc +lat_1=17.5 +lat_2=29.5 +lat_0=12 +lon_0=-102 +x_0=2500000 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0")
                   """
ipython.magic(string_transform)

poncaloc_transf = ipython.magic("Rget poncaloc_transf")


In [4]:
#
ipython.magic("load_ext rpy2.ipython")
print(poncaloc_transf)
ipython.magic("Rpush poncaloc_transf")
#
string_libraries = """R library(hsi)"""

ipython.magic(string_libraries)

string_test = """R test_sp <- sp_temporal_data(occs=poncaloc_transf,longitude = "coords.x1",
                                               latitude = "coords.x2",sp_year_var="Year",
                                               layers_by_year_dir ="/shared_volume/forest_jEquihua_mar/",
                                               layers_ext = "*.tif$",reclass_year_data = T)
              """
ipython.magic(string_test)

test_sp = ipython.magic("Rget test_sp")


In [5]:
#
ipython.magic("load_ext rpy2.ipython")


string_libraries = """R library(hsi);library(raster)"""

ipython.magic(string_libraries)

print(test_sp)
print(ponca_mask)
ipython.magic("Rpush test_sp")
ipython.magic("Rpush ponca_mask")
#

#Filtrar las localidades que se usaran mediante la mascara
string_filter = """R test_sp_mask <- occs_filter_by_mask(test_sp,ponca_mask)
                """
ipython.magic(string_filter)

#Limpia localidades duplicadas por anio

string_clean_test = """R test_sp_clean <- clean_dup_by_year(this_species = test_sp,
                                                            threshold = res(ponca_mask)[1])
                    """

ipython.magic(string_clean_test)

string_extract = """R e_test <- extract_by_year(this_species=test_sp_clean,layers_pattern="_mar")
                 """

ipython.magic(string_extract)

e_test = ipython.magic("Rget e_test")


In [6]:
#
ipython.magic("load_ext rpy2.ipython")
print(e_test)
ipython.magic("Rpush e_test")
#
string_libraries = """R library(hsi)"""

ipython.magic(string_libraries)

string_bestmodel = """R best_model_2004 <- find_best_model(this_species = e_test,
                                                           cor_threshold = 0.8,
                                                           ellipsoid_level = 0.975,
                                                           nvars_to_fit = 3,E = 0.05,
                                                           RandomPercent = 70,
                                                           NoOfIteration = 1000,
                                                           parallel = TRUE,
                                                           n_cores = 24,
                                                           plot3d = FALSE)
                   """

ipython.magic(string_bestmodel)

best_model_2004 = ipython.magic("Rget best_model_2004")


In [7]:
#
ipython.magic("load_ext rpy2.ipython")

string_libraries = """R library(hsi);library(raster)"""

ipython.magic(string_libraries)

print(best_model_2004)
print(ponca_mask)
ipython.magic("Rpush best_model_2004")
ipython.magic("Rpush ponca_mask")
#

results = "/shared_volume/new_model_parallel/26_05_2021_2/"

string_temporal_proj = """R temporal_projection(this_species = best_model_2004,
                                                save_dir = "/shared_volume/new_model_parallel/26_05_2021_2/",
                                                sp_mask = ponca_mask,
                                                crs_model = NULL,
                                                sp_name ="pan_onca",
                                                plot3d = FALSE)
                      """

if not os.path.exists(results):
    os.makedirs(results)
    
ipython.magic(string_temporal_proj)

temporal_projection = ipython.magic("Rget temporal_projection")


In [32]:
dir_to_upload = glob.glob(results + '*')[0]

cmd_subprocess = ["aws", "s3", "cp",
                  dir_to_upload,
                  "s3://hsi-kale-results/26_05_2021",
                  "--recursive"]

subprocess.run(cmd_subprocess)