From 1d3148a8532b9319265fbe4107cdde81b554b3a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 22 Apr 2010 10:07:18 +0200 Subject: [PATCH] Optimize selective receives in the presence of a large message queue If a gen_server process has many messages in its message queue and calls another gen_server process, the selective receive in gen_server:call() will have to go through the entire message queue. Have the compiler generate the new mark_recv/1 and mark_recv/1 instructions that can avoid going through the entire message queue. --- lib/compiler/src/Makefile | 1 + lib/compiler/src/beam_receive.erl | 388 ++++++++++++++++++++++++++++++ lib/compiler/src/compile.erl | 6 + lib/compiler/src/compiler.app.src | 1 + 4 files changed, 396 insertions(+) create mode 100644 lib/compiler/src/beam_receive.erl diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile index 70ddd5414520..0f6d2f6193f9 100644 --- a/lib/compiler/src/Makefile +++ b/lib/compiler/src/Makefile @@ -58,6 +58,7 @@ MODULES = \ beam_listing \ beam_opcodes \ beam_peep \ + beam_receive \ beam_trim \ beam_type \ beam_utils \ diff --git a/lib/compiler/src/beam_receive.erl b/lib/compiler/src/beam_receive.erl new file mode 100644 index 000000000000..9ed44ad5d7cc --- /dev/null +++ b/lib/compiler/src/beam_receive.erl @@ -0,0 +1,388 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(beam_receive). +-export([module/2]). +-import(lists, [foldl/3,reverse/1,reverse/2]). + +%%% +%%% In code such as: +%%% +%%% Ref = make_ref(), %Or erlang:monitor(process, Pid) +%%% . +%%% . +%%% . +%%% receive +%%% {Ref,Reply} -> Reply +%%% end. +%%% +%%% we know that none of the messages that exist in the message queue +%%% before the call to make_ref/0 can be matched out in the receive +%%% statement. Therefore we can avoid going through the entire message +%%% queue if we introduce two new instructions (here written as +%%% BIFs in pseudo-Erlang): +%%% +%%% recv_mark(SomeUniqInteger), +%%% Ref = make_ref(), +%%% . +%%% . +%%% . +%%% recv_set(SomeUniqInteger), +%%% receive +%%% {Ref,Reply} -> Reply +%%% end. +%%% +%%% The recv_mark/1 instruction will save the current position and +%%% SomeUniqInteger in the process context. The recv_set +%%% instruction will verify that SomeUniqInteger is still stored +%%% in the process context. If it is, it will set the current pointer +%%% for the message queue (the next message to be read out) to the +%%% position that was saved by recv_mark/1. +%%% +%%% The remove_message instruction must be modified to invalidate +%%% the information stored by the previous recv_mark/1, in case there +%%% is another receive executed between the calls to recv_mark/1 and +%%% recv_set/1. +%%% +%%% We use a reference to a label (i.e. a position in the loaded code) +%%% as the SomeUniqInteger. +%%% + +module({Mod,Exp,Attr,Fs0,Lc}, _Opts) -> + Fs = [function(F) || F <- Fs0], + Code = {Mod,Exp,Attr,Fs,Lc}, + {ok,Code}. + +%%% +%%% Local functions. +%%% + +function({function,Name,Arity,Entry,Is}) -> + try + D = beam_utils:index_labels(Is), + {function,Name,Arity,Entry,opt(Is, D, [])} + catch + Class:Error -> + Stack = erlang:get_stacktrace(), + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. + +opt([{call_ext,Arity,{extfunc,erlang,Name,Arity}}=I|Is0], D, Acc) -> + case creates_new_ref(Name, Arity) of + true -> + %% The call creates a brand new reference. Now + %% search for a receive statement in the same + %% function that will match against the reference. + case opt_recv(Is0, D) of + no -> + opt(Is0, D, [I|Acc]); + {yes,Is,Lbl} -> + opt(Is, D, [I,{recv_mark,{f,Lbl}}|Acc]) + end; + false -> + opt(Is0, D, [I|Acc]) + end; +opt([I|Is], D, Acc) -> + opt(Is, D, [I|Acc]); +opt([], _, Acc) -> + reverse(Acc). + +%% creates_new_ref(Name, Arity) -> true|false. +%% Return 'true' if the BIF Name/Arity will create a new reference. +creates_new_ref(monitor, 2) -> true; +creates_new_ref(make_ref, 0) -> true; +creates_new_ref(_, _) -> false. + +%% opt_recv([Instruction], LabelIndex) -> no|{yes,[Instruction]} +%% Search for a receive statement that will only retrieve messages +%% that contain the newly created reference (which is currently in {x,0}). +opt_recv(Is, D) -> + R = regs_init_x0(), + L = gb_sets:empty(), + opt_recv(Is, D, R, L, []). + +opt_recv([{label,L}=Lbl,{loop_rec,{f,Fail},_}=Loop|Is], D, R0, _, Acc) -> + R = regs_kill_not_live(0, R0), + case regs_to_list(R) of + [{y,_}=RefReg] -> + %% We now have the new reference in the Y register RefReg + %% and the current instruction is the beginning of a + %% receive statement. We must now verify that only messages + %% that contain the reference will be matched. + case opt_ref_used(Is, RefReg, Fail, D) of + false -> + no; + true -> + RecvSet = {recv_set,{f,L}}, + {yes,reverse(Acc, [RecvSet,Lbl,Loop|Is]),L} + end; + [] -> + no + end; +opt_recv([I|Is], D, R0, L0, Acc) -> + {R,L} = opt_update_regs(I, R0, L0), + case regs_empty(R) of + true -> + %% The reference is no longer alive. There is no + %% point in continuing the search. + no; + false -> + opt_recv(Is, D, R, L, [I|Acc]) + end. + +opt_update_regs({block,Bl}, R, L) -> + {opt_update_regs_bl(Bl, R),L}; +opt_update_regs({call,_,_}, R, L) -> + {regs_kill_not_live(0, R),L}; +opt_update_regs({call_ext,_,_}, R, L) -> + {regs_kill_not_live(0, R),L}; +opt_update_regs({call_fun,_}, R, L) -> + {regs_kill_not_live(0, R),L}; +opt_update_regs({kill,Y}, R, L) -> + {regs_kill([Y], R),L}; +opt_update_regs(send, R, L) -> + {regs_kill_not_live(0, R),L}; +opt_update_regs({'catch',_,{f,Lbl}}, R, L) -> + {R,gb_sets:add(Lbl, L)}; +opt_update_regs({catch_end,_}, R, L) -> + {R,L}; +opt_update_regs({label,Lbl}, R, L) -> + case gb_sets:is_member(Lbl, L) of + false -> + %% We can't allow arbitrary labels (since the receive + %% could be entered without first creating the reference). + {regs_init(),L}; + true -> + %% A catch label for a previously seen catch instruction is OK. + {R,L} + end; +opt_update_regs({try_end,_}, R, L) -> + {R,L}; +opt_update_regs(_I, _R, L) -> + %% Unrecognized instruction. Abort the search. + {regs_init(),L}. + +opt_update_regs_bl([{set,Ds,_,{alloc,Live,_}}|Is], Regs0) -> + Regs1 = regs_kill_not_live(Live, Regs0), + Regs = regs_kill(Ds, Regs1), + opt_update_regs_bl(Is, Regs); +opt_update_regs_bl([{set,[Dst]=Ds,[Src],move}|Is], Regs0) -> + Regs1 = regs_kill(Ds, Regs0), + Regs = case regs_is_member(Src, Regs1) of + false -> Regs1; + true -> regs_add(Dst, Regs1) + end, + opt_update_regs_bl(Is, Regs); +opt_update_regs_bl([{set,Ds,_,_}|Is], Regs0) -> + Regs = regs_kill(Ds, Regs0), + opt_update_regs_bl(Is, Regs); +opt_update_regs_bl([], Regs) -> Regs. + +%% opt_ref_used([Instruction], RefRegister, FailLabel, LabelIndex) -> true|false +%% Return 'true' if it is certain that only messages that contain the same +%% reference as in RefRegister can be matched out. Otherwise return 'false'. +%% +%% Basically, we follow all possible paths through the receive statement. +%% If all paths are safe, we return 'true'. +%% +%% A branch to FailLabel is safe, because it exits the receive statement +%% and no further message may be matched out. +%% +%% If a path hits an comparision between RefRegister and part of the message, +%% that path is safe (any messages that may be matched further down the +%% path is guaranteed to contain the reference). +%% +%% Otherwise, if we hit a 'remove_message' instruction, we give up +%% and return 'false' (the optimization is definitely unsafe). If +%% we hit an unrecognized instruction, we also give up and return +%% 'false' (the optimization may be unsafe). + +opt_ref_used(Is, RefReg, Fail, D) -> + Done = gb_sets:singleton(Fail), + Regs = regs_init_x0(), + try + opt_ref_used_1(Is, RefReg, D, Done, Regs), + true + catch + throw:not_used -> + false + end. + +%% This functions only returns if all paths through the receive +%% statement are safe, and throws an 'not_used' term otherwise. +opt_ref_used_1([{block,Bl}|Is], RefReg, D, Done, Regs0) -> + Regs = opt_ref_used_bl(Bl, Regs0), + opt_ref_used_1(Is, RefReg, D, Done, Regs); +opt_ref_used_1([{test,is_eq_exact,{f,Fail},Args}|Is], RefReg, D, Done0, Regs) -> + Done = opt_ref_used_at(Fail, RefReg, D, Done0, Regs), + case is_ref_msg_comparison(Args, RefReg, Regs) of + false -> + opt_ref_used_1(Is, RefReg, D, Done, Regs); + true -> + %% The instructions that follow (Is) can only be executed + %% if the message contains the same reference as in RefReg. + Done + end; +opt_ref_used_1([{test,is_ne_exact,{f,Fail},Args}|Is], RefReg, D, Done0, Regs) -> + Done = opt_ref_used_1(Is, RefReg, D, Done0, Regs), + case is_ref_msg_comparison(Args, RefReg, Regs) of + false -> + opt_ref_used_at(Fail, RefReg, D, Done, Regs); + true -> + Done + end; +opt_ref_used_1([{test,_,{f,Fail},_}|Is], RefReg, D, Done0, Regs) -> + Done = opt_ref_used_at(Fail, RefReg, D, Done0, Regs), + opt_ref_used_1(Is, RefReg, D, Done, Regs); +opt_ref_used_1([{select_tuple_arity,_,{f,Fail},{list,List}}|_], RefReg, D, Done, Regs) -> + Lbls = [F || {f,F} <- List] ++ [Fail], + opt_ref_used_in_all(Lbls, RefReg, D, Done, Regs); +opt_ref_used_1([{select_val,_,{f,Fail},{list,List}}|_], RefReg, D, Done, Regs) -> + Lbls = [F || {f,F} <- List] ++ [Fail], + opt_ref_used_in_all(Lbls, RefReg, D, Done, Regs); +opt_ref_used_1([{label,Lbl}|Is], RefReg, D, Done, Regs) -> + case gb_sets:is_member(Lbl, Done) of + true -> Done; + false -> opt_ref_used_1(Is, RefReg, D, Done, Regs) + end; +opt_ref_used_1([{loop_rec_end,_}|_], _, _, Done, _) -> + Done; +opt_ref_used_1([_I|_], _RefReg, _D, _Done, _Regs) -> + %% The optimization may be unsafe. + throw(not_used). + +%% is_ref_msg_comparison(Args, RefReg, RegisterSet) -> true|false. +%% Return 'true' if Args denotes a comparison between the +%% reference and message or part of the message. +is_ref_msg_comparison([R,RefReg], RefReg, Regs) -> + regs_is_member(R, Regs); +is_ref_msg_comparison([RefReg,R], RefReg, Regs) -> + regs_is_member(R, Regs); +is_ref_msg_comparison([_,_], _, _) -> false. + +opt_ref_used_in_all([L|Ls], RefReg, D, Done0, Regs) -> + Done = opt_ref_used_at(L, RefReg, D, Done0, Regs), + opt_ref_used_in_all(Ls, RefReg, D, Done, Regs); +opt_ref_used_in_all([], _, _, Done, _) -> Done. + +opt_ref_used_at(Fail, RefReg, D, Done0, Regs) -> + case gb_sets:is_member(Fail, Done0) of + true -> + Done0; + false -> + Is = beam_utils:code_at(Fail, D), + Done = opt_ref_used_1(Is, RefReg, D, Done0, Regs), + gb_sets:add(Fail, Done) + end. + +opt_ref_used_bl([{set,[],[],remove_message}|_], _) -> + %% We have proved that a message that does not depend on the + %% reference can be matched out. + throw(not_used); +opt_ref_used_bl([{set,Ds,Ss,_}|Is], Regs0) -> + case regs_all_members(Ss, Regs0) of + false -> + %% The destination registers may be assigned values that + %% are not dependent on the message being matched. + Regs = regs_kill(Ds, Regs0), + opt_ref_used_bl(Is, Regs); + true -> + %% All the sources depend on the message directly or + %% indirectly. + Regs = regs_add_list(Ds, Regs0), + opt_ref_used_bl(Is, Regs) + end; +opt_ref_used_bl([], Regs) -> Regs. + +%%% +%%% Functions for keeping track of a set of registers. +%%% + +%% regs_init() -> RegisterSet +%% Return an empty set of registers. + +regs_init() -> + {0,0}. + +%% regs_init_x0() -> RegisterSet +%% Return a set that only contains the {x,0} register. + +regs_init_x0() -> + {1 bsl 0,0}. + +%% regs_empty(Register) -> true|false +%% Test whether the register set is empty. + +regs_empty(R) -> + R =:= {0,0}. + +%% regs_kill_not_live(Live, RegisterSet) -> RegisterSet' +%% Kill all registers indicated not live by Live. + +regs_kill_not_live(Live, {Xregs,Yregs}) -> + {Xregs band ((1 bsl Live)-1),Yregs}. + +%% regs_kill([Register], RegisterSet) -> RegisterSet' +%% Kill all registers mentioned in the list of registers. + +regs_kill([{x,N}|Rs], {Xregs,Yregs}) -> + regs_kill(Rs, {Xregs band (bnot (1 bsl N)),Yregs}); +regs_kill([{y,N}|Rs], {Xregs,Yregs}) -> + regs_kill(Rs, {Xregs,Yregs band (bnot (1 bsl N))}); +regs_kill([{fr,_}|Rs], Regs) -> + regs_kill(Rs, Regs); +regs_kill([], Regs) -> Regs. + +regs_add_list(List, Regs) -> + foldl(fun(R, A) -> regs_add(R, A) end, Regs, List). + +%% regs_add(Register, RegisterSet) -> RegisterSet' +%% Add a new register to the set of registers. + +regs_add({x,N}, {Xregs,Yregs}) -> + {Xregs bor (1 bsl N),Yregs}; +regs_add({y,N}, {Xregs,Yregs}) -> + {Xregs,Yregs bor (1 bsl N)}. + +%% regs_all_members([Register], RegisterSet) -> true|false +%% Test whether all of the registers are part of the register set. + +regs_all_members([R|Rs], Regs) -> + regs_is_member(R, Regs) andalso regs_all_members(Rs, Regs); +regs_all_members([], _) -> true. + +%% regs_is_member(Register, RegisterSet) -> true|false +%% Test whether Register is part of the register set. + +regs_is_member({x,N}, {Regs,_}) -> Regs band (1 bsl N) =/= 0; +regs_is_member({y,N}, {_,Regs}) -> Regs band (1 bsl N) =/= 0; +regs_is_member(_, _) -> false. + +%% regs_to_list(RegisterSet) -> [Register] +%% Convert the register set to an explicit list of registers. +regs_to_list({Xregs,Yregs}) -> + regs_to_list_1(Xregs, 0, x, regs_to_list_1(Yregs, 0, y, [])). + +regs_to_list_1(0, _, _, Acc) -> + Acc; +regs_to_list_1(Regs, N, Tag, Acc) when (Regs band 1) =:= 1 -> + regs_to_list_1(Regs bsr 1, N+1, Tag, [{Tag,N}|Acc]); +regs_to_list_1(Regs, N, Tag, Acc) -> + regs_to_list_1(Regs bsr 1, N+1, Tag, Acc). diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index 5017fd2c238f..3f250a6d5ac9 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -162,6 +162,10 @@ expand_opt(report, Os) -> [report_errors,report_warnings|Os]; expand_opt(return, Os) -> [return_errors,return_warnings|Os]; +expand_opt(r12, Os) -> + [no_recv_opt|Os]; +expand_opt(r13, Os) -> + [no_recv_opt|Os]; expand_opt({debug_info_key,_}=O, Os) -> [encrypt_debug_info,O|Os]; expand_opt(no_float_opt, Os) -> @@ -621,6 +625,8 @@ asm_passes() -> {iff,dclean,{listing,"clean"}}, {unless,no_bsm_opt,{pass,beam_bsm}}, {iff,dbsm,{listing,"bsm"}}, + {unless,no_recv_opt,{pass,beam_receive}}, + {iff,drecv,{listing,"recv"}}, {unless,no_stack_trimming,{pass,beam_trim}}, {iff,dtrim,{listing,"trim"}}, {pass,beam_flatten}]}, diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src index b0311365c472..1bb62f41cf6f 100644 --- a/lib/compiler/src/compiler.app.src +++ b/lib/compiler/src/compiler.app.src @@ -33,6 +33,7 @@ beam_listing, beam_opcodes, beam_peep, + beam_receive, beam_trim, beam_type, beam_utils,