diff --git a/psyneulink/core/components/functions/nonstateful/optimizationfunctions.py b/psyneulink/core/components/functions/nonstateful/optimizationfunctions.py index df8d182577c..6f4ce9f8588 100644 --- a/psyneulink/core/components/functions/nonstateful/optimizationfunctions.py +++ b/psyneulink/core/components/functions/nonstateful/optimizationfunctions.py @@ -769,18 +769,22 @@ def _is_static(it:SampleIterator): return False assert all(_is_static(sample_iterator) for sample_iterator in self.search_space) + assert ocm is ocm.agent_rep.controller - # Compiled evaluate expects the same variable as mech function - variable = [input_port.parameters.value.get(context) for input_port in ocm.input_ports] + + # Compiled evaluate expects the same variable as composition + state_features = ocm.parameters.state_feature_values._get(context) + inputs, num_inputs_sets = ocm.agent_rep._parse_run_inputs(state_features, context) + num_evals = np.prod([d.num for d in self.search_space]) # Map allocations to values comp_exec = pnlvm.execution.CompExecution(ocm.agent_rep, [context.execution_id]) execution_mode = ocm.parameters.comp_execution_mode._get(context) if execution_mode == "PTX": - outcomes = comp_exec.cuda_evaluate(variable, num_evals) + outcomes = comp_exec.cuda_evaluate(inputs, num_inputs_sets, num_evals) elif execution_mode == "LLVM": - outcomes = comp_exec.thread_evaluate(variable, num_evals) + outcomes = comp_exec.thread_evaluate(inputs, num_inputs_sets, num_evals) else: assert False, f"Unknown execution mode for {ocm.name}: {execution_mode}." @@ -1744,14 +1748,46 @@ def _gen_llvm_select_min_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags:fr return builder.function def _gen_llvm_function_body(self, ctx, builder, params, state_features, arg_in, arg_out, *, tags:frozenset): - ocm = self._get_optimized_controller() - if ocm is not None: - assert ocm.function is self - obj_func = ctx.import_llvm_function(ocm, tags=tags.union({"evaluate"})) + controller = self._get_optimized_controller() + if controller is not None: + assert controller.function is self + obj_func = ctx.import_llvm_function(controller, tags=tags.union({"evaluate"})) comp_args = builder.function.args[-3:] obj_param_ptr = comp_args[0] obj_state_ptr = comp_args[1] - extra_args = [arg_in, comp_args[2]] + + # Construct input + comp_input = builder.alloca(obj_func.args[4].type.pointee, name="sim_input") + + input_initialized = [False] * len(comp_input.type.pointee) + for src_idx, ip in enumerate(controller.input_ports): + if ip.shadow_inputs is None: + continue + + # shadow inputs point to an input port of of a node. + # If that node takes direct input, it will have an associated + # (input_port, output_port) in the input_CIM. + # Take the former as an index to composition input variable. + cim_in_port = controller.agent_rep.input_CIM_ports[ip.shadow_inputs][0] + dst_idx = controller.agent_rep.input_CIM.input_ports.index(cim_in_port) + + # Check that all inputs are unique + assert not input_initialized[dst_idx], "Double initialization of input {}".format(dst_idx) + input_initialized[dst_idx] = True + + src = builder.gep(arg_in, [ctx.int32_ty(0), ctx.int32_ty(src_idx)]) + # Destination is a struct of 2d arrays + dst = builder.gep(comp_input, [ctx.int32_ty(0), + ctx.int32_ty(dst_idx), + ctx.int32_ty(0)]) + builder.store(builder.load(src), dst) + + # Assert that we have populated all inputs + assert all(input_initialized), \ + "Not all inputs to the simulated composition are initialized: {}".format(input_initialized) + + # Extra args: input and data + extra_args = [comp_input, comp_args[2]] else: obj_func = ctx.import_llvm_function(self.objective_function) obj_state_ptr = pnlvm.helpers.get_state_ptr(builder, self, state_features, diff --git a/psyneulink/core/components/functions/userdefinedfunction.py b/psyneulink/core/components/functions/userdefinedfunction.py index 0cb5db217f3..cecd68425ae 100644 --- a/psyneulink/core/components/functions/userdefinedfunction.py +++ b/psyneulink/core/components/functions/userdefinedfunction.py @@ -9,6 +9,7 @@ # # ***************************************** USER-DEFINED FUNCTION **************************************************** +import builtins import numpy as np import typecheck as tc from inspect import signature, _empty, getsourcelines, getsourcefile, getclosurevars @@ -34,7 +35,7 @@ def __init__(self, *args, **kwargs): self.functions = set() def visit_Name(self, node): - if node.id not in __builtins__: + if node.id not in dir(builtins): self.vars.add(node.id) def visit_Call(self, node): @@ -44,7 +45,7 @@ def visit_Call(self, node): except AttributeError: func_id = node.func.id - if func_id not in __builtins__: + if func_id not in dir(builtins): self.functions.add(func_id) for c in ast.iter_child_nodes(node): diff --git a/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py b/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py index 67b665ce8cf..8abba09428e 100644 --- a/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py +++ b/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py @@ -3196,10 +3196,6 @@ def evaluate_agent_rep(self, control_allocation, context=None, return_results=Fa context=context ) - def _get_evaluate_input_struct_type(self, ctx): - # We construct input from optimization function input - return ctx.get_input_struct_type(self.function) - def _get_evaluate_output_struct_type(self, ctx): # Returns a scalar that is the predicted net_outcome return ctx.float_ty @@ -3326,7 +3322,7 @@ def _gen_llvm_evaluate_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags=froz ctx.get_state_struct_type(self.agent_rep).as_pointer(), self._get_evaluate_alloc_struct_type(ctx).as_pointer(), self._get_evaluate_output_struct_type(ctx).as_pointer(), - self._get_evaluate_input_struct_type(ctx).as_pointer(), + ctx.get_input_struct_type(self.agent_rep).as_pointer(), ctx.get_data_struct_type(self.agent_rep).as_pointer()] builder = ctx.create_llvm_function(args, self, str(self) + "_evaluate") @@ -3334,7 +3330,7 @@ def _gen_llvm_evaluate_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags=froz for p in llvm_func.args: p.attributes.add('nonnull') - comp_params, base_comp_state, allocation_sample, arg_out, arg_in, base_comp_data = llvm_func.args + comp_params, base_comp_state, allocation_sample, arg_out, comp_input, base_comp_data = llvm_func.args if "const_params" in debug_env: comp_params = builder.alloca(comp_params.type.pointee, name="const_params_loc") @@ -3390,37 +3386,8 @@ def _gen_llvm_evaluate_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags=froz ctx.int32_ty(0)]) builder.store(builder.load(sample_ptr), sample_dst) - # Construct input - comp_input = builder.alloca(sim_f.args[3].type.pointee, name="sim_input") - - input_initialized = [False] * len(comp_input.type.pointee) - for src_idx, ip in enumerate(self.input_ports): - if ip.shadow_inputs is None: - continue - - # shadow inputs point to an input port of of a node. - # If that node takes direct input, it will have an associated - # (input_port, output_port) in the input_CIM. - # Take the former as an index to composition input variable. - cim_in_port = self.agent_rep.input_CIM_ports[ip.shadow_inputs][0] - dst_idx = self.agent_rep.input_CIM.input_ports.index(cim_in_port) - - # Check that all inputs are unique - assert not input_initialized[dst_idx], "Double initialization of input {}".format(dst_idx) - input_initialized[dst_idx] = True - - src = builder.gep(arg_in, [ctx.int32_ty(0), ctx.int32_ty(src_idx)]) - # Destination is a struct of 2d arrays - dst = builder.gep(comp_input, [ctx.int32_ty(0), - ctx.int32_ty(dst_idx), - ctx.int32_ty(0)]) - builder.store(builder.load(src), dst) - - # Assert that we have populated all inputs - assert all(input_initialized), \ - "Not all inputs to the simulated composition are initialized: {}".format(input_initialized) - if "const_input" in debug_env: + comp_input = builder.alloca(sim_f.args[3].type.pointee, name="sim_input") if not debug_env["const_input"]: input_init = [[os.defaults.variable.tolist()] for os in self.agent_rep.input_CIM.input_ports] print("Setting default input: ", input_init) diff --git a/psyneulink/core/llvm/execution.py b/psyneulink/core/llvm/execution.py index ab96adfafd4..2500d160997 100644 --- a/psyneulink/core/llvm/execution.py +++ b/psyneulink/core/llvm/execution.py @@ -149,7 +149,7 @@ def upload_ctype(self, data, name='other'): # 0-sized structures fail to upload # provide a small device buffer instead return jit_engine.pycuda.driver.mem_alloc(4) - return jit_engine.pycuda.driver.to_device(bytearray(data)) + return jit_engine.pycuda.driver.to_device(bytes(data)) def download_ctype(self, source, ty, name='other'): self._downloaded_bytes[name] += ctypes.sizeof(ty) @@ -563,8 +563,11 @@ def cuda_execute(self, inputs): # Methods used to accelerate "Run" - def _get_run_input_struct(self, inputs, num_input_sets): - input_type = self._bin_run_func.byref_arg_types[3] + def _get_run_input_struct(self, inputs, num_input_sets, arg=3): + # Callers that override input arg, should ensure that _bin_func is not None + bin_f = self._bin_run_func if arg == 3 else self._bin_func + + input_type = bin_f.byref_arg_types[arg] c_input = (input_type * num_input_sets) * len(self._execution_contexts) if len(self._execution_contexts) == 1: inputs = [inputs] @@ -676,7 +679,7 @@ def cuda_run(self, inputs, runs, num_input_sets): assert runs_np[0] <= runs, "Composition ran more times than allowed!" return _convert_ctype_to_python(ct_out)[0:runs_np[0]] - def _prepare_evaluate(self, variable, num_evaluations): + def _prepare_evaluate(self, inputs, num_input_sets, num_evaluations): ocm = self._composition.controller assert len(self._execution_contexts) == 1 @@ -694,26 +697,24 @@ def _prepare_evaluate(self, variable, num_evaluations): ct_comp_state = self._get_compilation_param('_eval_state', '_get_state_initializer', 1) ct_comp_data = self._get_compilation_param('_eval_data', '_get_data_initializer', 6) - # Construct input variable - var_dty = _element_dtype(bin_func.byref_arg_types[5]) - converted_variable = np.concatenate(variable, dtype=var_dty) + # Construct input variable, the 5th parameter of the evaluate function + ct_inputs = self._get_run_input_struct(inputs, num_input_sets, 5) # Output ctype out_ty = bin_func.byref_arg_types[4] * num_evaluations # return variable as numpy array. pycuda can use it directly - return ct_comp_param, ct_comp_state, ct_comp_data, converted_variable, out_ty + return ct_comp_param, ct_comp_state, ct_comp_data, ct_inputs, out_ty - def cuda_evaluate(self, variable, num_evaluations): - ct_comp_param, ct_comp_state, ct_comp_data, converted_variable, out_ty = \ - self._prepare_evaluate(variable, num_evaluations) - self._uploaded_bytes['input'] += converted_variable.nbytes + def cuda_evaluate(self, inputs, num_input_sets, num_evaluations): + ct_comp_param, ct_comp_state, ct_comp_data, ct_inputs, out_ty = \ + self._prepare_evaluate(inputs, num_input_sets, num_evaluations) # Output is allocated on device, but we need the ctype (out_ty). cuda_args = (self.upload_ctype(ct_comp_param, 'params'), self.upload_ctype(ct_comp_state, 'state'), jit_engine.pycuda.driver.mem_alloc(ctypes.sizeof(out_ty)), - jit_engine.pycuda.driver.In(converted_variable), + self.upload_ctype(ct_inputs, 'input'), self.upload_ctype(ct_comp_data, 'data'), ) @@ -722,12 +723,11 @@ def cuda_evaluate(self, variable, num_evaluations): return ct_results - def thread_evaluate(self, variable, num_evaluations): - ct_param, ct_state, ct_data, converted_variale, out_ty = \ - self._prepare_evaluate(variable, num_evaluations) + def thread_evaluate(self, inputs, num_input_sets, num_evaluations): + ct_param, ct_state, ct_data, ct_inputs, out_ty = \ + self._prepare_evaluate(inputs, num_input_sets, num_evaluations) ct_results = out_ty() - ct_variable = converted_variale.ctypes.data_as(self.__bin_func.c_func.argtypes[5]) jobs = min(os.cpu_count(), num_evaluations) evals_per_job = (num_evaluations + jobs - 1) // jobs @@ -738,7 +738,9 @@ def thread_evaluate(self, variable, num_evaluations): results = [ex.submit(self.__bin_func, ct_param, ct_state, int(i * evals_per_job), min((i + 1) * evals_per_job, num_evaluations), - ct_results, ct_variable, ct_data) + ct_results, + ctypes.cast(ctypes.byref(ct_inputs), self.__bin_func.c_func.argtypes[5]), + ct_data) for i in range(jobs)] parallel_stop = time.time() diff --git a/psyneulink/core/llvm/helpers.py b/psyneulink/core/llvm/helpers.py index bdb887e7ddc..7c3b0414d00 100644 --- a/psyneulink/core/llvm/helpers.py +++ b/psyneulink/core/llvm/helpers.py @@ -443,7 +443,7 @@ def printf_float_array(builder, array, prefix="", suffix="\n", override_debug=Fa printf(builder, prefix, override_debug=override_debug) with array_ptr_loop(builder, array, "print_array_loop") as (b1, i): - printf(b1, "%lf ", b1.load(b1.gep(array, [ir.IntType(32)(0), i])), override_debug=override_debug) + printf(b1, "%lf ", b1.load(b1.gep(array, [i.type(0), i])), override_debug=override_debug) printf(builder, suffix, override_debug=override_debug) @@ -451,7 +451,7 @@ def printf_float_array(builder, array, prefix="", suffix="\n", override_debug=Fa def printf_float_matrix(builder, matrix, prefix="", suffix="\n", override_debug=False): printf(builder, prefix, override_debug=override_debug) with array_ptr_loop(builder, matrix, "print_row_loop") as (b1, i): - row = b1.gep(matrix, [ir.IntType(32)(0), i]) + row = b1.gep(matrix, [i.type(0), i]) printf_float_array(b1, row, suffix="\n", override_debug=override_debug) printf(builder, suffix, override_debug=override_debug) diff --git a/requirements.txt b/requirements.txt index 1b67c662835..c113611feea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,9 +6,8 @@ graphviz<0.21.0 grpcio<1.43.0 grpcio-tools<1.43.0 llvmlite<0.40 -matplotlib<3.5.3 -modeci_mdf>=0.3.4, <0.4.2 -modelspec<0.2.7 +matplotlib<3.5.4 +modeci_mdf<0.5, >=0.3.4 networkx<2.9 numpy<1.21.7, >=1.17.0 pillow<9.3.0 diff --git a/tests/functions/test_transfer.py b/tests/functions/test_transfer.py index 5f71ee55d35..e0c0066295e 100644 --- a/tests/functions/test_transfer.py +++ b/tests/functions/test_transfer.py @@ -76,15 +76,15 @@ def test_execute(func, variable, params, expected, benchmark, func_mode): benchmark(ex, variable) -relu_derivative_helper = lambda x : RAND1 if x > 0 else RAND1 * RAND3 logistic_helper = RAND4 / (1 + np.exp(-(RAND1 * (test_var - RAND2)) + RAND3)) tanh_derivative_helper = (RAND1 * (test_var + RAND2) + RAND3) tanh_derivative_helper = (1 - np.tanh(tanh_derivative_helper)**2) * RAND4 * RAND1 + derivative_test_data = [ (Functions.Linear, test_var, {'slope':RAND1, 'intercept':RAND2}, RAND1), (Functions.Exponential, test_var, {'scale':RAND1, 'rate':RAND2}, RAND1 * RAND2 * np.exp(RAND2 * test_var)), (Functions.Logistic, test_var, {'gain':RAND1, 'x_0':RAND2, 'offset':RAND3, 'scale':RAND4}, RAND1 * RAND4 * logistic_helper * (1 - logistic_helper)), - (Functions.ReLU, test_var, {'gain':RAND1, 'bias':RAND2, 'leak':RAND3}, list(map(relu_derivative_helper, test_var))), + (Functions.ReLU, test_var, {'gain':RAND1, 'bias':RAND2, 'leak':RAND3}, np.where(test_var > 0, RAND1, RAND1 * RAND3)), (Functions.Tanh, test_var, {'gain':RAND1, 'bias':RAND2, 'offset':RAND3, 'scale':RAND4}, tanh_derivative_helper), ] diff --git a/tests/llvm/test_multiple_executions.py b/tests/llvm/test_multiple_executions.py index ebb40d038b7..a843deb7614 100644 --- a/tests/llvm/test_multiple_executions.py +++ b/tests/llvm/test_multiple_executions.py @@ -23,20 +23,19 @@ @pytest.mark.function @pytest.mark.distance_function @pytest.mark.benchmark -@pytest.mark.parametrize("executions", [1,10,100]) +@pytest.mark.parametrize("executions", [1, 10, 100]) def test_function(benchmark, executions, func_mode): f = Functions.Distance(default_variable=test_var, metric=kw.EUCLIDEAN) benchmark.group = "DistanceFunction multirun {}".format(executions) var = [test_var for _ in range(executions)] if executions > 1 else test_var if func_mode == 'Python': - e = lambda x : [f.function(x[i]) for i in range(executions)] - res = benchmark(e if executions > 1 else f.function, var) + e = f.function if executions == 1 else lambda x: [f.function(xi) for xi in x] elif func_mode == 'LLVM': - e = pnlvm.execution.FuncExecution(f, [None for _ in range(executions)]) - res = benchmark(e.execute, var) + e = pnlvm.execution.FuncExecution(f, [None for _ in range(executions)]).execute elif func_mode == 'PTX': - e = pnlvm.execution.FuncExecution(f, [None for _ in range(executions)]) - res = benchmark(e.cuda_execute, var) + e = pnlvm.execution.FuncExecution(f, [None for _ in range(executions)]).cuda_execute + + res = benchmark(e, var) assert np.allclose(res, [expected for _ in range(executions)]) assert executions == 1 or len(res) == executions @@ -44,7 +43,7 @@ def test_function(benchmark, executions, func_mode): @pytest.mark.mechanism @pytest.mark.transfer_mechanism @pytest.mark.benchmark -@pytest.mark.parametrize("executions", [1,10,100]) +@pytest.mark.parametrize("executions", [1, 10, 100]) def test_mechanism(benchmark, executions, mech_mode): benchmark.group = "TransferMechanism multirun {}".format(executions) variable = [0 for _ in range(SIZE)] @@ -58,17 +57,16 @@ def test_mechanism(benchmark, executions, mech_mode): var = [[10.0 for _ in range(SIZE)] for _ in range(executions)] expected = [[8.0 for i in range(SIZE)]] if mech_mode == 'Python': - f = lambda x : [T.execute(x[i]) for i in range(executions)] - res = benchmark(f if executions > 1 else T.execute, var) + e = T.execute if executions ==1 else lambda x : [T.execute(x[i]) for i in range(executions)] elif mech_mode == 'LLVM': - e = pnlvm.execution.MechExecution(T, [None for _ in range(executions)]) - res = benchmark(e.execute, var) + e = pnlvm.execution.MechExecution(T, [None for _ in range(executions)]).execute elif mech_mode == 'PTX': - e = pnlvm.execution.MechExecution(T, [None for _ in range(executions)]) - res = benchmark(e.cuda_execute, var) + e = pnlvm.execution.MechExecution(T, [None for _ in range(executions)]).cuda_execute + if executions > 1: expected = [expected for _ in range(executions)] + res = benchmark(e, var) assert np.allclose(res, expected) assert len(res) == executions @@ -77,7 +75,7 @@ def test_mechanism(benchmark, executions, mech_mode): @pytest.mark.nested @pytest.mark.composition @pytest.mark.benchmark -@pytest.mark.parametrize("executions", [1,10,100]) +@pytest.mark.parametrize("executions", [1, 10, 100]) @pytest.mark.parametrize("mode", ['Python', pytest.param('LLVM', marks=pytest.mark.llvm), pytest.param('PTX', marks=[pytest.mark.llvm, pytest.mark.cuda])]) @@ -106,10 +104,11 @@ def test_nested_composition_execution(benchmark, executions, mode): expected = [[0.52497918747894]] if executions > 1: var = [var for _ in range(executions)] + if mode == 'Python': - f = lambda x : [outer_comp.execute(x[i], context=i) for i in range(executions)] - res = f(var) if executions > 1 else outer_comp.execute(var) - benchmark(f if executions > 1 else outer_comp.execute, var) + e = outer_comp.execute if executions == 1 else lambda x : [outer_comp.execute(x[i], context=i) for i in range(executions)] + res = e(var) + benchmark(e, var) elif mode == 'LLVM': e = pnlvm.execution.CompExecution(outer_comp, [None for _ in range(executions)]) e.execute(var) @@ -129,7 +128,7 @@ def test_nested_composition_execution(benchmark, executions, mode): @pytest.mark.nested @pytest.mark.composition @pytest.mark.benchmark -@pytest.mark.parametrize("executions", [1,10,100]) +@pytest.mark.parametrize("executions", [1, 10, 100]) @pytest.mark.parametrize("mode", ['Python', pytest.param('LLVM', marks=pytest.mark.llvm), pytest.param('PTX', marks=[pytest.mark.llvm, pytest.mark.cuda])]) @@ -159,9 +158,9 @@ def test_nested_composition_run(benchmark, executions, mode): if executions > 1: var = [var for _ in range(executions)] if mode == 'Python': - f = lambda x : [outer_comp.run(x[i], context=i) for i in range(executions)] - res = f(var) if executions > 1 else outer_comp.run(var) - benchmark(f if executions > 1 else outer_comp.run, var) + e = outer_comp.run if executions == 1 else lambda x : [outer_comp.run(x[i], context=i) for i in range(executions)] + res = e(var) + benchmark(e, var) elif mode == 'LLVM': e = pnlvm.execution.CompExecution(outer_comp, [None for _ in range(executions)]) res = e.run(var, 1, 1) @@ -178,7 +177,7 @@ def test_nested_composition_run(benchmark, executions, mode): @pytest.mark.nested @pytest.mark.composition @pytest.mark.benchmark -@pytest.mark.parametrize("executions", [1,10,100]) +@pytest.mark.parametrize("executions", [1, 10, 100]) @pytest.mark.parametrize("mode", ['Python', pytest.param('LLVM', marks=pytest.mark.llvm), pytest.param('PTX', marks=[pytest.mark.llvm, pytest.mark.cuda])]) diff --git a/tutorial_requirements.txt b/tutorial_requirements.txt index 6d141f739cd..6c0b32c13fd 100644 --- a/tutorial_requirements.txt +++ b/tutorial_requirements.txt @@ -1,3 +1,3 @@ graphviz<0.21.0 jupyter<=1.0.0 -matplotlib<3.5.3 +matplotlib<3.5.4