Skip to content

Commit

Permalink
Merge pull request #611 from binpash/future
Browse files Browse the repository at this point in the history
PaSh version 0.9
  • Loading branch information
angelhof committed Jul 18, 2022
2 parents 3b32506 + 01ec61f commit ea962cc
Show file tree
Hide file tree
Showing 29 changed files with 470 additions and 118 deletions.
2 changes: 1 addition & 1 deletion annotations/pr.json
Expand Up @@ -10,7 +10,7 @@
[
{
"operator": "exists",
"operands": ["-T, --omit-pagination"],
"operands": ["-T, --omit-pagination"]
},
{
"operator": "!exists",
Expand Down
12 changes: 12 additions & 0 deletions annotations/rev.json
@@ -0,0 +1,12 @@
{
"command": "rev",
"cases":
[
{
"predicate": "default",
"class": "stateless",
"inputs": ["stdin"],
"outputs": ["stdout"]
}
]
}
31 changes: 20 additions & 11 deletions compiler/ast_to_ir.py
Expand Up @@ -863,15 +863,15 @@ def preprocess_node_case(ast_node, irFileGen, config, last_object=False):
## If we are need to disable parallel pipelines, e.g., if we are in the context of an if,
## or if we are in the end of a script, then we set a variable.
def replace_df_region(asts, irFileGen, config, disable_parallel_pipelines=False, ast_text=None):
_, ir_filename = ptempfile()
ir_filename = ptempfile()

## Serialize the node in a file
with open(ir_filename, "wb") as ir_file:
pickle.dump(asts, ir_file)

## Serialize the candidate df_region asts back to shell
## so that the sequential script can be run in parallel to the compilation.
_, sequential_script_file_name = ptempfile()
sequential_script_file_name = ptempfile()
## If we don't have the original ast text, we need to unparse the ast
if (ast_text is None):
kv_asts = [ast_node_to_untyped_deep(ast) for ast in asts]
Expand Down Expand Up @@ -911,12 +911,12 @@ def make_call_to_runtime(ir_filename, sequential_script_file_name,

## Save the input arguments
## ```
## pash_input_args="$@"
## source $PASH_TOP/runtime/save_args.sh "${@}"
## ```
assignments = [["pash_input_args",
[make_quoted_variable("@")]]]
input_args_command = make_command([],
assignments=assignments)
arguments = [string_to_argument("source"),
string_to_argument(config.SAVE_ARGS_EXECUTABLE),
[make_quoted_variable("@")]]
input_args_command = make_command(arguments)

## Disable parallel pipelines if we are in the last command of the script.
## ```
Expand All @@ -942,15 +942,24 @@ def make_call_to_runtime(ir_filename, sequential_script_file_name,

## Restore the arguments to propagate internal changes, e.g., from `shift` outside.
## ```
## set -- $pash_input_args
## eval "set -- \"\${pash_input_args[@]}\""
## ```
##
## Alternative Solution: (TODO if we need extra performance -- avoiding eval)
## Implement an AST node that accepts and returns a literal string
## bypassing unparsing. This would make this simpler and also more
## efficient (avoiding eval).
## However, it would require some work because we would need to implement
## support for this node in various places of PaSh and the unparser.
##
##
## TODO: Maybe we need to only do this if there is a change.
set_arguments = [string_to_argument("set"),
string_to_argument("--"),
[standard_var_ast("pash_input_args")]]
##
set_arguments = [string_to_argument("eval"),
[['Q', string_to_argument('set -- \\"\\${pash_input_args[@]}\\"')]]]
set_args_node = make_command(set_arguments)


## Restore the exit code (since now we have executed `set` last)
## ```
## ( exit "$pash_runtime_final_status")
Expand Down
95 changes: 91 additions & 4 deletions compiler/config.py
Expand Up @@ -10,7 +10,7 @@
from util import *

## Global
__version__ = "0.8" # FIXME add libdash version
__version__ = "0.9" # FIXME add libdash version
GIT_TOP_CMD = [ 'git', 'rev-parse', '--show-toplevel', '--show-superproject-working-tree']
if 'PASH_TOP' in os.environ:
PASH_TOP = os.environ['PASH_TOP']
Expand All @@ -20,6 +20,7 @@
PYTHON_VERSION = "python3"
PLANNER_EXECUTABLE = os.path.join(PASH_TOP, "compiler/pash_runtime.py")
RUNTIME_EXECUTABLE = os.path.join(PASH_TOP, "compiler/pash_runtime.sh")
SAVE_ARGS_EXECUTABLE = os.path.join(PASH_TOP, "runtime/save_args.sh")

## Ensure that PASH_TMP_PREFIX is set by pa.sh
assert(not os.getenv('PASH_TMP_PREFIX') is None)
Expand Down Expand Up @@ -297,17 +298,97 @@ def reset_variable_cache():

variable_cache = {}

def is_array_variable(token):
return ('a' in token)

## Based on the following:
## https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html#ANSI_002dC-Quoting
def ansi_c_expand(string):
return bytes(string, "utf-8").decode("unicode_escape")

## This finds the end of this variable/function
def find_next_delimiter(tokens, i):
if (tokens[i] == "declare"):
return i + 3
else:
## TODO: When is this case actually useful?
j = i + 1
while j < len(tokens) and (tokens[j] != "declare"):
j += 1
return j

def parse_array_variable(tokens, i):
## The `declare` keyword
_declare = tokens[i]
## The type
declare_type = tokens[i+1]
assert(is_array_variable(declare_type))

## The variable name and first argument
## TODO: Test with empty array and single value array
name_and_start=tokens[i+2]
first_equal_index = name_and_start.find('=')

## If it doesn't contain any = then it is empty
if first_equal_index == -1:
## Then the name is the whole token,
## the type is None (TODO)
## and the value is empty
return name_and_start, None, "", i+3

var_name = name_and_start[:first_equal_index]
array_start = name_and_start[first_equal_index+1:]

var_values = []
if array_start == "()":
next_i = i+3
else:
## Remove the opening parenthesis
array_item = array_start[1:]

## Set the index that points to array items
curr_i = i+2

done = False
while not done:
## TODO: Is this check adequate? Or could it miss the end
## (or be misleaded into an earlier end by the item value?)
if array_item.endswith(")"):
done = True
array_item = array_item[:-1]

first_equal_index = array_item.find('=')
## Find the index and value of the array item
item_index_raw = array_item[:first_equal_index]
item_value = array_item[first_equal_index+1:]

## Sometimes the value starts with a dollar mark, see Bash ANSI-C quoting:
## https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html#ANSI_002dC-Quoting
if item_value.startswith("$"):
## TODO: Figure out if this is adequate
item_value = ansi_c_expand(item_value[1:])

item_index = int(item_index_raw[1:-1])

## Add None values if the index is larger than the next item (see Bash sparse arrays)
## TODO: Keep bash array values as maps to avoid sparse costs
var_values += [None] * (item_index - len(var_values))
## Set the next item
var_values.append(item_value)



## Get next array_item
curr_i += 1
array_item = tokens[curr_i]

next_i = curr_i

## TODO: Michael?
var_type = None

return var_name, var_type, var_values, next_i

##
## Read a shell variables file
##
Expand Down Expand Up @@ -337,7 +418,7 @@ def read_vars_file(var_file_path):
tokens = shlex.split(data)
variable_tokenizing_end_time = datetime.now()
print_time_delta("Variable Tokenizing", variable_tokenizing_start_time, variable_tokenizing_end_time)
# log(tokens)
# log("Tokens:", tokens)

# MMG 2021-03-09 definitively breaking on newlines (e.g., IFS) and function outputs (i.e., `declare -f`)
# KK 2021-10-26 no longer breaking on newlines (probably)
Expand All @@ -346,11 +427,17 @@ def read_vars_file(var_file_path):
token_i = 0
while token_i < len(tokens):
# FIXME is this assignment needed?
_export_or_typeset = tokens[token_i]
export_or_typeset = tokens[token_i]

## Array variables require special parsing treatment
if (export_or_typeset == "declare" and is_array_variable(tokens[token_i+1])):
var_name, var_type, var_value, new_token_i = parse_array_variable(tokens, token_i)
vars_dict[var_name] = (var_type, var_value)
token_i = new_token_i
continue

new_token_i = find_next_delimiter(tokens, token_i)
rest = " ".join(tokens[(token_i+1):new_token_i])
# log("Rest:", rest)
token_i = new_token_i

space_index = rest.find(' ')
Expand Down
4 changes: 2 additions & 2 deletions compiler/dspash/ir_helper.py
Expand Up @@ -47,7 +47,7 @@ def save_configs(graph:IR, dfs_configs_paths: Dict[HDFSFileConfig, str]):
resource : DFSSplitResource = edge.get_resource()
config: HDFSFileConfig = resource.config
if config not in dfs_configs_paths:
_, config_path = ptempfile()
config_path = ptempfile()
with open(config_path, "w") as f:
f.write(config)
dfs_configs_paths[config] = config_path
Expand All @@ -57,7 +57,7 @@ def save_configs(graph:IR, dfs_configs_paths: Dict[HDFSFileConfig, str]):
resource.set_config_path(config_path)

def to_shell_file(graph: IR, args) -> str:
_, filename = ptempfile()
filename = ptempfile()

dirs = set()
for edge in graph.all_fids():
Expand Down
2 changes: 1 addition & 1 deletion compiler/dspash/worker.sh
Expand Up @@ -20,7 +20,7 @@ source "$PASH_TOP/compiler/pash_init_setup.sh" "$@" --distributed_exec

export PASH_TMP_PREFIX="$(mktemp -d /tmp/pash_XXXXXXX)/"

function cleanup() {
cleanup() {
kill "$FILEREADER_PID" "$DISCOVERY_PID"
wait "$FILEREADER_PID" "$DISCOVERY_PID" 2>/dev/null
rm -rf "$PASH_TMP_PREFIX"
Expand Down
25 changes: 18 additions & 7 deletions compiler/expand.py
Expand Up @@ -255,20 +255,21 @@ def lookup_variable(var, _lookup_config):
return None, var_value

if(var == '@'):
expanded_var = lookup_variable_inner('pash_input_args')
argument_values = lookup_variable_inner_core('pash_input_args')
expanded_var = " ".join(argument_values)
elif(var == '?'):
expanded_var = lookup_variable_inner('pash_previous_exit_status')
elif(var == '-'):
expanded_var = lookup_variable_inner('pash_previous_set_status')
elif(var == '#'):
input_args = lookup_variable_inner('pash_input_args')
expanded_var = str(len(input_args.split()))
argument_values = lookup_variable_inner_core('pash_input_args')
expanded_var = str(len(argument_values))
elif(var.isnumeric() and int(var) >= 1):
input_args = lookup_variable_inner('pash_input_args')
split_args = input_args.split()
input_args = lookup_variable_inner_core('pash_input_args')
# split_args = input_args.split()
index = int(var) - 1
try:
expanded_var = split_args[index]
expanded_var = input_args[index]
except:
## If there are not enough arguments -u is set we need to raise
if is_u_set():
Expand All @@ -287,8 +288,18 @@ def lookup_variable(var, _lookup_config):

return None, expanded_var

## Looks up the variable and if it is unset it raises an error
## Looksup a variable and flattens it if it is an array
def lookup_variable_inner(varname):
value = lookup_variable_inner_core(varname)
if value is not None and not isinstance(value, str):
## TODO: This is not handled at the moment (and it is unclear if it should be).
##
## This is only returned when we are in an array
raise Unimplemented("Expanded value is not None or a string", (varname, value))
return value

## Looks up the variable and if it is unset it raises an error
def lookup_variable_inner_core(varname):
value = lookup_variable_inner_unsafe(varname)
if value is None and is_u_set():
raise StuckExpansion("-u is set and variable was unset", varname)
Expand Down
7 changes: 6 additions & 1 deletion compiler/ir_utils.py
Expand Up @@ -162,7 +162,8 @@ def string_to_arguments(string):
return [string_to_argument(word) for word in string.split(" ")]

def string_to_argument(string):
return [char_to_arg_char(char) for char in string]
ret = [char_to_arg_char(char) for char in string]
return ret

## FIXME: This is certainly not complete. It is used to generate the
## AST for the call to the distributed planner. It only handles simple
Expand Down Expand Up @@ -193,6 +194,10 @@ def make_background(body, redirections=[]):
node = make_kv("Background", [lineno, body, redirections])
return node

def make_backquote(node):
node = make_kv("B", node)
return node

def make_subshell(body, redirections=[]):
lineno = 0
node = make_kv("Subshell", [lineno, body, redirections])
Expand Down
24 changes: 12 additions & 12 deletions compiler/parser/ceda/ast2a.py
Expand Up @@ -163,25 +163,25 @@ def mk_file (ty, n):
return ["File", [ty, n.nfile.fd, arg]];


def mk_dup (ty, n):
ndup = n.ndup;
vname = ndup.vname;

tgt = [];
def mk_dup(ty, n):
ndup = n.ndup
vname = ndup.vname
tgt = []

if (not vname):
dupfd = ndup.dupfd;
dupfd = ndup.dupfd
if (dupfd == -1):
tgt.append (["C", ORD_MINUS]);
tgt.append(["C", ORD_MINUS])
else:
dupfd_str = str (dupfd);
dupfd_str = str(dupfd)

for i in range (len (dupfd_str)):
tgt.append (["C", ord (dupfd_str [i])]);
for i in range(len(dupfd_str)):
tgt.append(["C", ord(dupfd_str[i])])
else:
tgt = to_arg (vname.narg);
tgt = to_arg(vname.contents.narg)

return (["Dup", [ty, ndup.fd, tgt]]);
return (["Dup", [ty, ndup.fd, tgt]])


def mk_here (ty, n):
Expand Down
10 changes: 6 additions & 4 deletions compiler/parser/ceda/ast2shell.py
Expand Up @@ -332,9 +332,9 @@ def string_of_arg_char (c, is_quoted=False):

## MMG 2021-09-20 It might be safe to move everything except for " in the second list, but no need to do it if the tests pass
## Chars to escape unconditionally
chars_to_escape = ["'", '"', '`', '(', ')', '{', '}', '$', '!', '&', '|', ';']
chars_to_escape = ["'", '"', '`', '(', ')', '{', '}', '$', '&', '|', ';']
## Chars to escape only when not quoted
chars_to_escape_when_no_quotes = ['*', '?', '[', ']', '#', '<', '>', '~', ' ']
chars_to_escape_when_no_quotes = ['*', '?', '[', ']', '#', '<', '>', '~', '!', ' ']
if char in chars_to_escape:
return '\\' + char
elif char in chars_to_escape_when_no_quotes and not is_quoted:
Expand Down Expand Up @@ -393,7 +393,7 @@ def string_of_arg_char (c, is_quoted=False):
# | [] -> ""
# | c :: a -> string_of_arg_char c ^ string_of_arg a
def string_of_arg (args, is_quoted=False):
# print (args);
# print ("Unparsing:", args, " -- is quoted:", is_quoted)

i = 0
text = []
Expand All @@ -414,7 +414,9 @@ def string_of_arg (args, is_quoted=False):

text = "".join(text)

return (text);
# print("To text:", text)

return (text)


# and string_of_assign (v,a) = v ^ "=" ^ string_of_arg a
Expand Down

0 comments on commit ea962cc

Please sign in to comment.