Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow batch -F to set a more specific naming template #6934

Merged
merged 12 commits into from
Aug 3, 2022
25 changes: 22 additions & 3 deletions doc/rst/source/batch.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Synopsis
|-N|\ *prefix*
|-T|\ *njobs*\|\ *min*/*max*/*inc*\ [**+n**]\|\ *timefile*\ [**+p**\ *width*]\ [**+s**\ *first*]\ [**+w**\ [*str*]\|\ **W**]
[ |-D| ]
[ |-F|\ *template* ]
[ |-I|\ *includefile* ]
[ |-M|\ [*job*] ]
[ |-Q|\ [**s**] ]
Expand Down Expand Up @@ -86,8 +87,25 @@ Optional Arguments
.. _-D:

**-D**
The main script does not produce products that are named using the prefix **BATCH_NAME**, so we do not attempt
to move such files to the top directory. The main script will instead handle this in a different way.
The main script does not produce products named using the prefix **BATCH_NAME**, so we do not attempt
to move such files to the top directory, or the main script will handle the placement of any such
product files directly.

.. _-F:

**-F**\ *template*
Rather than build product file names from the **BATCH_NAME** prefix based on a single running number,
use this C-format *template* instead and create unique names by formatting the data columns given by
*timefile*. Some limitations apply: (1) If *timefile* has trailing text then it may be used with a
single %s code as the last format statement in *template*. If no %s is found then the trailing text
is not used. (2) The previous *N* format statements will be filled using the first *N* data columns
in *timefile*; there is no option to skip a column or to specify a specific order of columns in the
template. (3) A maximum of 5 numerical statements may be used (provided the *timefile* has enough
columns), including none. E.g., -Fmy_data_%5.2lf_%7.0lf_%s will use the first two columns in *timefile*
as well as the trailing text to create unique product prefix names. **Note**: Since the data set internally
is using double precision variables you must use floating point format statements even if some or all
of your data columns are integers. Finally, if your choice of format statements and trailing text yield
spaces in the final prefix we will automatically replace those with underscores.

.. _-I:

Expand Down Expand Up @@ -180,7 +198,8 @@ column in *timefile*. If *timefile* has trailing text then that text can be acc
**BATCH_TEXT**, and if word-splitting was explicitly requested by **+w** modifier to |-T| then the trailing
text is also split into individual word parameters **BATCH_WORD0**\ , **BATCH_WORD1**\ , etc. **Note**: Any
product(s) made by the processing scripts should be named using **BATCH_NAME** as their name prefix as these
will be automatically moved up to the starting directory upon completion.
will be automatically moved up to the starting directory upon completion (unless |-D| is in effect). However,
note that |-F| can be used to select more diverse product names based on the input parameters given via |-T|.

Data Files
----------
Expand Down
98 changes: 88 additions & 10 deletions src/batch.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ struct BATCH_CTRL {
struct BATCH_D { /* -D */
bool active;
} D;
struct BATCH_F { /* -F<template> */
bool active;
char *template; /* Name template of product files via -T data */
} F;
struct BATCH_I { /* -I<includefile> */
bool active;
char *file; /* Name of include script */
Expand Down Expand Up @@ -125,6 +129,7 @@ static void Free_Ctrl (struct GMT_CTRL *GMT, struct BATCH_CTRL *C) { /* Dealloca
gmt_M_unused (GMT);
if (!C) return;
gmt_M_str_free (C->In.file);
gmt_M_str_free (C->F.template);
gmt_M_str_free (C->I.file);
gmt_M_str_free (C->N.prefix);
gmt_M_str_free (C->S[BATCH_PREFLIGHT].file);
Expand Down Expand Up @@ -174,11 +179,40 @@ GMT_LOCAL int batch_delete_scripts (struct GMT_CTRL *GMT, struct BATCH_CTRL *Ctr
return (GMT_NOERROR);
}

GMT_LOCAL void batch_set_product_prefix (struct GMT_CTRL *GMT, struct GMT_DATASET *D, unsigned int row, unsigned int n_fmts, char *template, char *custom_name) {
/* Create the custom product file prefix */
struct GMT_DATASEGMENT *S = D->table[0]->segment[0]; /* Current (and only) segment */
gmt_M_unused (GMT);
if (D->type == GMT_READ_DATA) { /* No trailing text used */
switch (n_fmts) { /* May use 1-5 numerical columns */
case 1: sprintf (custom_name, template, S->data[0][row]); break;
case 2: sprintf (custom_name, template, S->data[0][row], S->data[1][row]); break;
case 3: sprintf (custom_name, template, S->data[0][row], S->data[1][row], S->data[2][row]); break;
case 4: sprintf (custom_name, template, S->data[0][row], S->data[1][row], S->data[2][row], S->data[3][row]); break;
case 5: sprintf (custom_name, template, S->data[0][row], S->data[1][row], S->data[2][row], S->data[3][row], S->data[4][row]); break;
default: break; /* We have checked it is no more than 5 */
}
}
else { /* Use the trailing text as well */
switch (n_fmts) { /* May use 0-5 numerical columns since trailing text is also used */
case 0: sprintf (custom_name, template, S->text[row]); break;
case 1: sprintf (custom_name, template, S->data[0][row], S->text[row]); break;
case 2: sprintf (custom_name, template, S->data[0][row], S->data[1][row], S->text[row]); break;
case 3: sprintf (custom_name, template, S->data[0][row], S->data[1][row], S->data[2][row], S->text[row]); break;
case 4: sprintf (custom_name, template, S->data[0][row], S->data[1][row], S->data[2][row], S->data[3][row], S->text[row]); break;
case 5: sprintf (custom_name, template, S->data[0][row], S->data[1][row], S->data[2][row], S->data[3][row], S->data[4][row], S->text[row]); break;
default: break; /* We have checked it is no more than 5 */
}
}
gmt_strrepc (custom_name, ' ', '_'); /* Replace any spaces with underscores to avoid template disasters */
gmt_strrepc (custom_name, '\t', '_'); /* Replace any tabs with underscores to avoid template disasters */
}

static int usage (struct GMTAPI_CTRL *API, int level) {
const char *name = gmt_show_name_and_purpose (API, THIS_MODULE_LIB, THIS_MODULE_CLASSIC_NAME, THIS_MODULE_PURPOSE);
if (level == GMT_MODULE_PURPOSE) return (GMT_NOERROR);
GMT_Usage (API, 0, "usage: %s <mainscript> -N<prefix> -T<njobs>|<min>/<max>/<inc>[+n]|<timefile>[+p<width>][+s<first>][+w[<str>]|W] "
"[-D] [-I<includefile>] [-M[<job>]] [-Q[s]] [-Sb<postflight>] [-Sf<preflight>] "
"[-D] [-F<template>] -I<includefile>] [-M[<job>]] [-Q[s]] [-Sb<postflight>] [-Sf<preflight>] "
"[%s] [-W[<dir>]] [-Z] [%s] [-x[[-]<n>]] [%s]\n", name, GMT_V_OPT, GMT_f_OPT, GMT_PAR_OPT);

if (level == GMT_SYNOPSIS) return (GMT_MODULE_SYNOPSIS);
Expand All @@ -200,6 +234,9 @@ static int usage (struct GMTAPI_CTRL *API, int level) {
GMT_Message (API, GMT_TIME_NONE, "\n OPTIONAL ARGUMENTS:\n");
GMT_Usage (API, 1, "\n-D");
GMT_Usage (API, -2, "No product files to move to the main directory [Default assumes there are named products].");
GMT_Usage (API, 1, "\n-F<template>");
GMT_Usage (API, -2, "Create unique BATCH_NAME file prefixes using the <template> and the columns in <timefile> [use running job numbers]. "
"Requires -T with at least the same number of columns as format statements in <template>.");
GMT_Usage (API, 1, "\n-I<includefile>");
GMT_Usage (API, -2, "Specify a script file to be inserted into the batch_init.sh script [none]. "
"Used to add constant variables needed by all batch scripts.");
Expand Down Expand Up @@ -253,6 +290,11 @@ static int parse (struct GMT_CTRL *GMT, struct BATCH_CTRL *Ctrl, struct GMT_OPTI
n_errors += gmt_M_repeated_module_option (API, Ctrl->D.active);
break;

case 'F': /* Product name template */
n_errors += gmt_M_repeated_module_option (API, Ctrl->F.active);
n_errors += gmt_get_required_string (GMT, opt->arg, opt->option, 0, &Ctrl->F.template);
break;

case 'I': /* Include file with settings used by all scripts */
n_errors += gmt_M_repeated_module_option (API, Ctrl->I.active);
n_errors += gmt_get_required_file (GMT, opt->arg, opt->option, 0, GMT_IS_DATASET, GMT_IN, GMT_FILE_REMOTE, &(Ctrl->I.file));
Expand All @@ -264,7 +306,7 @@ static int parse (struct GMT_CTRL *GMT, struct BATCH_CTRL *Ctrl, struct GMT_OPTI
Ctrl->M.job = atoi (opt->arg);
break;

case 'N': /* Movie prefix and directory name */
case 'N': /* Batch job prefix and directory name */
n_errors += gmt_M_repeated_module_option (API, Ctrl->N.active);
n_errors += gmt_get_required_string (GMT, opt->arg, opt->option, 0, &Ctrl->N.prefix);
break;
Expand Down Expand Up @@ -415,10 +457,11 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
int error = 0, precision;
int (*run_script)(const char *); /* pointer to system function or a dummy */

unsigned int n_values = 0, n_jobs = 0, job, i_job, col, k, n_cores_unused, n_to_run;
unsigned int n_values = 0, n_jobs = 0, job, i_job, col, k, n_cores_unused, n_to_run, n_fmts = 0;
unsigned int n_jobs_not_started = 0, n_jobs_completed = 0, first_i_job = 0, data_job;

bool done = false, n_written = false, has_text = false, is_classic = false, has_conf = false, issue_col0_par = false;
bool done = false, n_written = false, has_text = false, is_classic = false, has_conf = false;
bool got_time_file = false, issue_col0_par = false;

static char *extension[3] = {"sh", "csh", "bat"}, *load[3] = {"source", "source", "call"}, var_token[4] = "$$%";
static char *rmdir[3] = {"rm -rf", "rm -rf", "rd /s /q"}, *export[3] = {"export ", "setenv ", ""};
Expand Down Expand Up @@ -488,6 +531,7 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
batch_close_files (Ctrl);
Return (GMT_RUNTIME_ERROR);
}
got_time_file = true;
}

if (Ctrl->W.active) { /* Do all work in a temp directory */
Expand Down Expand Up @@ -649,6 +693,7 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
n_jobs = (unsigned int)D->n_records; /* Number of records means number of jobs */
n_values = (unsigned int)D->n_columns; /* The number of per-job parameters we need to place into the per-job parameter files */
has_text = (D && D->table[0]->segment[0]->text); /* Trailing text present */
got_time_file = true;
}
else if (gmt_count_char (GMT, Ctrl->T.file, '/') == 2) { /* Give a vector specification -Tmin/max/inc, call gmtmath to build the array */
char output[GMT_VF_LEN] = {""}, cmd[GMT_LEN128] = {""};
Expand Down Expand Up @@ -685,6 +730,32 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
fclose (Ctrl->In.fp);
Return (GMT_RUNTIME_ERROR);
}
if (Ctrl->F.active) {
if (!got_time_file) {
GMT_Report (API, GMT_MSG_ERROR, "Option -F: Requires a file via the -T option - exiting.\n");
fclose (Ctrl->In.fp);
Return (GMT_RUNTIME_ERROR);
}
n_fmts = gmt_count_char (GMT, Ctrl->F.template, '%');
if (strstr (Ctrl->F.template, "%s")) {
n_fmts--; /* One less since trailing text will be used */
if (D->type == GMT_READ_DATA) {
GMT_Report (API, GMT_MSG_ERROR, "Option -F: A string is expected by the template but your -T file has no trailing text.\n");
fclose (Ctrl->In.fp);
Return (GMT_RUNTIME_ERROR);
}
}
else if (n_fmts == 0) {
GMT_Report (API, GMT_MSG_ERROR, "Option -F: Your template has no format statements so cannot be used.\n");
fclose (Ctrl->In.fp);
Return (GMT_RUNTIME_ERROR);
}
if (n_fmts > D->n_columns) {
GMT_Report (API, GMT_MSG_ERROR, "Option -F: Not enough input columns in your -T file to satisfy template.\n");
fclose (Ctrl->In.fp);
Return (GMT_RUNTIME_ERROR);
}
}

if (!n_written) { /* Rewrite the init file to place the BATCH_NJOBS there */
GMT_Report (API, GMT_MSG_INFORMATION, "Recreate parameter initiation script given njobs has been set %s\n", init_file);
Expand Down Expand Up @@ -779,7 +850,14 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
sprintf (state_prefix, "Parameter file for job %s", state_tag);
gmt_set_comment (fp, Ctrl->In.mode, state_prefix);
sprintf (state_prefix, "%s_%s", Ctrl->N.prefix, state_tag);
gmt_set_tvalue (fp, Ctrl->In.mode, false, "BATCH_NAME", state_prefix); /* Current job name prefix (e.g., my_job_0003) */
gmt_set_tvalue (fp, Ctrl->In.mode, false, "BATCH_DIR", state_prefix); /* Current directory name for the job (e.g., my_job_0003) */
if (Ctrl->F.active) { /* Build product name from input table and template */
char custom_name[GMT_LEN512] = {""};
batch_set_product_prefix (GMT, D, data_job, n_fmts, Ctrl->F.template, custom_name);
gmt_set_tvalue (fp, Ctrl->In.mode, false, "BATCH_NAME", custom_name); /* Custom product name prefix via template */
}
else /* Default current product name prefix (e.g., my_job_0003) */
gmt_set_tvalue (fp, Ctrl->In.mode, false, "BATCH_NAME", state_prefix);
gmt_set_ivalue (fp, Ctrl->In.mode, false, "BATCH_JOB", data_job); /* Current job number (e.g., 3) */
gmt_set_tvalue (fp, Ctrl->In.mode, false, "BATCH_ITEM", state_tag); /* Current job tag (formatted job number, e.g, 0003) */
for (col = 0; col < n_values; col++) { /* Derive job variables from this row in <timefile> and copy to each parameter file as script variables */
Expand Down Expand Up @@ -824,8 +902,8 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
gmt_set_comment (fp, Ctrl->In.mode, "Include static and job-specific parameters");
fprintf (fp, "%s %s\n", load[Ctrl->In.mode], init_file); /* Include the initialization parameters */
fprintf (fp, "%s batch_params_%c1.%s\n", load[Ctrl->In.mode], var_token[Ctrl->In.mode], extension[Ctrl->In.mode]); /* Include the job parameters */
fprintf (fp, "mkdir %s\n", gmt_place_var (Ctrl->In.mode, "BATCH_NAME")); /* Make a temp directory for this job */
fprintf (fp, "cd %s\n", gmt_place_var (Ctrl->In.mode, "BATCH_NAME")); /* cd to the temp directory */
fprintf (fp, "mkdir %s\n", gmt_place_var (Ctrl->In.mode, "BATCH_DIR")); /* Make a temp directory for this job */
fprintf (fp, "cd %s\n", gmt_place_var (Ctrl->In.mode, "BATCH_DIR")); /* cd to the temp directory */
while (gmt_fgets (GMT, line, PATH_MAX, Ctrl->In.fp)) { /* Read the main script and copy to loop script, with some exceptions */
if (gmt_is_gmtmodule (line, "begin")) { /* Must insert DIR_DATA setting */
fprintf (fp, "%s", line);
Expand All @@ -839,13 +917,13 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
}
fclose (Ctrl->In.fp); /* Done reading the main script */
if (!Ctrl->D.active) /* Move job products up to main directory */
fprintf (fp, "%s %s.* %s\n", mvfile[Ctrl->In.mode], gmt_place_var (Ctrl->In.mode, "BATCH_NAME"), topdir);
fprintf (fp, "%s %s* %s\n", mvfile[Ctrl->In.mode], gmt_place_var (Ctrl->In.mode, "BATCH_NAME"), topdir);
fprintf (fp, "cd ..\n"); /* cd up to parent dir */
/* Create completion file so batch knows this job is done */
fprintf (fp, "%s %s.___\n", createfile[Ctrl->In.mode], gmt_place_var (Ctrl->In.mode, "BATCH_NAME"));
fprintf (fp, "%s %s.___\n", createfile[Ctrl->In.mode], gmt_place_var (Ctrl->In.mode, "BATCH_DIR"));
if (!Ctrl->Q.active) { /* Delete evidence; otherwise we want to leave debug evidence when doing a single job only */
gmt_set_comment (fp, Ctrl->In.mode, "Remove job directory and job parameter file");
fprintf (fp, "%s %s\n", rmdir[Ctrl->In.mode], gmt_place_var (Ctrl->In.mode, "BATCH_NAME")); /* Remove the work dir and any files in it */
fprintf (fp, "%s %s\n", rmdir[Ctrl->In.mode], gmt_place_var (Ctrl->In.mode, "BATCH_DIR")); /* Remove the work dir and any files in it */
fprintf (fp, "%s batch_params_%c1.%s\n", rmfile[Ctrl->In.mode], var_token[Ctrl->In.mode], extension[Ctrl->In.mode]); /* Remove the parameter file for this job */
}
if (Ctrl->In.mode == GMT_DOS_MODE) /* This is crucial to the "start /B ..." statement below to ensure the DOS process terminates */
Expand Down