-
Notifications
You must be signed in to change notification settings - Fork 271
/
bap_core_theory.mli
4720 lines (3319 loc) · 153 KB
/
bap_core_theory.mli
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(** BAP Semantics Representation.
{1 A Gentle Introduction to the Core Theory}
The Core Theory is an intermediate language that is designed to
express the semantics of computer programs. It focuses on programs
that are represented in binary machine code and is capable of an
accurate representation of the architectural and micro-architectural
details of the program behavior.
The language is extensible. It is possible to add new language
features, without affecting existing analyses and even
without recompilation. Adding new analyses also doesn't require
any changes to the existing code, which also could be reused
without recompilation. Thus the language doesn't suffer from the
expression problem.
The language is rigidly typed with types expressed as OCaml
types. A type of a Core Theory term is inferred (and checked) by
the OCaml type system, which can statically ascertain that the
term is not ill-formed and no analysis will get stuck.
The language is adaptable. Analysts can select a designated
subset of the language which is relevant to their tasks without
getting bogged down by the irrelevant architectural details.
The language can express the semantics of the floating-point
operations including operations, but not limiting to, specified in
the IEEE754 standard.
The language facilitates developing custom intermediate
representation and languages, which could be seamlessly introduced
in the analysis pipeline without breaking existing components. The
new language is compatible with BIL, BIR, and older variants of
BIL. It is potentially compatible with any other intermediate
representations.
{2 The Language Hierarchy}
The Core Theory is not really a language but a family of
languages. If we will order the languages in this family by
subsumption, then we will get the following Hasse diagram:
{v
o Core
|
Trans o--------+--------o Float
| |
o Basic o FBasic
|
o Minimal
|
+-------+--------+--------+-------+
| | | | |
o o o o o
Init Bool Bitv Memory Effect
v}
The Core language subsumes all other sub-languages and includes
modular arithmetic and other operations on bitvectos, operations
with memories, registers, floating-points including
transcendental functions.
The reason to have so many languages is purely pragmatic: to
enable specialized implementations of analyses and lifters. This
structure is not really mandated, the languages are defined
structurally, not nominally, so it is possible to combine
languages in arbitrary ways, as well as define new languages.
The Core language, despite being at the top of our hierarchy, is
still very low-level and basic. It is intended to reflect
operations carried by classical computers with Harvard or
Princeton architectures. Therefore we chose the name "Core" to
reflect our vision of the Core language as the base for
higher-level hierarchies of languages.
{2 Hierarchy of Terms}
Terms and operations of the Core Theory languages are typed to
prevent the creation of ill-formed programs. We use the word sort
to denote a set of terms that share the same properties. The Core
Theory comes with a collection of predefined sorts, which are used
to specify the Core language, but it is possible to define new
sorts, to keep the theory extensible.
The terms of the Core Theory are divided in two classes - values
and effects. A value term denotes the semantics of programs that
produce values. Roughly, values correspond to the syntactic class of
language expressions.
Effects is a class of terms that do not produce values, but side
effects or just effects, e.g., changing a value of a register,
loading or storing memory location, performing I/O operation on a
port or issuing a synchronization barrier, etc. Roughly, effects
correspond to the syntactic class of language statements.
Both values and effects are knowledge classes. Each class is
further subdivided into an infinite set of sorts. The class of
values is inhabited with terms of Bool, Bitv, Mem, Float, and
Rmode. Some sorts are indexed, so they represent an (infinite)
family of sorts. For example, Bitv[s] is a family of bitvector sorts
indexed by their widths, e.g, Bitv[8], Bitv[32], etc.
The class of effects is subdivided into two sorts of effect, those
that affect the control flow and those that affect the data flow.
{v
Term
o
|
+------+------+
| |
Value o o Effect
| |
Bool o--+ +---+---+
| | |
Bitv[s] o--+ o o
| Ctrl[i] Data[i]
Mem[k,s] o--+
|
Float[f,s] o--+
|
Rmode o--+
v}
{2:vars Variables}
Variables are ubiquitous in programming languages and could be
used to reference memory locations, CPU registers, or just
be bound to expressions. Sometimes variables are typed, sometimes
they are just identifiers with not associated type.
In the Core Theory all variables are sorted, i.e., they have an
associated value sort. Variables are also having scope (lexical
visibility), and extent (lifetime) Finally, variables could be
mutable or immutable.
A physical variable is a global mutable variable with the infinite
scope and extent. They are used to refer predefined (micro)
architectural locations of a modeled system, e.g., registers,
memory banks, caches, register files, etc. Global variables has
identifiers that are the same as names, e.g., `RAX`, `R0`, `mem`,
etc. The important thing, is that a global variable usually has
some physical representation.
Virtual variables are dual to physical variables and are further
subdivided into mutable and immutable.
A mutable virtual variable represents an unspecified scratch
location that holds data of the specified sort. They could be used
to abstract an actual physical location in a modeled system (when
it is not relevant or just not known) or just to simplify the
analysis. The mutable virtual variables have identifier of the
form [#<number>], e.g, [#1], [#2048], etc.
Finally, an immutable virtual variable is a local variable that
holds a value of an expression. It has a limited scope and its
immutability is ensured by the type system since the scope of a
local binding can contain only pure terms, i.e., no
side-effects. These variables have identifiers of the form
[$<number>], e.g., [$1], [$2], etc, and since their scope is
limited, those identifiers are reused in different scopes.
{2 Theories and Semantics}
Languages of the Core Theory, including the Core itself are
represented as module signatures, i.e., they are pure abstractions
or interfaces that do not define any data types, functions, or values.
This approach is called tagless-final style [1], pioneered by
Carette and Kiselyov and later rediscovered under the name "Object
algebras" by Oliviera and Cook [2]. We encourage to read those
papers and accompanying literature, but it is not strictly needed,
especially since the underlying idea is pretty simple.
In the final style, an embedded language is not represented by an
abstract syntax tree or some intermediate representation data
structure, but by denotations in a semantic algebra. Where the
semantic algebra is an implementation (structure) of the language
signature. Or, in other words, it is its denotational semantics.
The structure may choose, basically, any denotations, as long as
it fits the signature. For example, the language could be denoted
with its textual representation, BIL code, LLVM IR, BIR, sets of
reachable addresses, and so on. In other words, an implementation
of the Core signature could be seen as an analysis that computes the
property of a term.
Unlike a classical final approach described in [1] the Core Theory
signatures do not include any abstract types, all types mentioned
in the theories are defined on the scope of the Core Theory. This
constraint basically turns a structure, which implements the
Theory, into a simple array of functions, a first class value with
no caveats (like types escaping the scope, etc).
To enable each semantic algebra to have its own denotation we
employ ['a Knowledge.Value.t]. Thus both Core Theory values and
effects are instances of the Knowledge Value, parametrized by
corresponding class indices. The Knowledge Value is an instance of
Domain, that makes it ideal for representing the denotational
semantics of programs. The Knowledge value is an extensible
record, where each field corresponds to a particular denotation,
it is possible to store several denotations in one Knowledge
value.
Denotational semantics is composable, i.e., a denotation of
a term is composed from denotations of its constituent terms.
However, some denotations are context dependent. To enable
this, we made a term denotation an instance of ['a knowledge],
i.e., a knowledge dependent computation.
To summarize, a denotation is a structure that implements methods
of the corresponding structure. Each method corresponds to a
language form, e.g, [val add : 'a bitv -> 'a bitv -> 'a bitv],
corresponds to an addition of two bitvectors. The implementation,
builds the denotation of the term from the denotations of its
inputs, e.g.,
{[
let add x y =
x >>-> fun x ->
y >>-> fun y ->
match x, y with
| Some x, Some y -> const (Bitvec.(x + y))
| _ -> nonconst
]}
Where [>>->] extracts the module specific denotation and [const],
[noncost] put them back (assuming that the denotation is the
classical constant folding lattice).
The final style makes it easy to write a fold-style analysis, such
as constant folding, taint analysis, etc. Since all terms are
knowledge dependent computations, i.e., wrapped into the knowledge
monad, which turns any computation into a fixed-point computation,
it is also possible to write data-flow analysis and other forms of
abstract interpretation. In fact, it was shown, that any
optimization or analysis could be written in the final style in a
modular and composable way [3]. However, the classical approach,
that uses tagged AST and pattern matching is not denied at all.
Since the denotation could be anything (that is an instance of
domain), it is quite natural to use BIL, and BIR, or any other
concrete syntax tree as a possible denotation. Therefore, it is
possible to extract those denotations and write your analysis
using the save haven of pattern matching.
{2 Writing a new denotation}
Any denotation must be an instance of the Core signature. However,
it is not always required to implement all methods, as they could
be inherited from other instance or filled in with the Empty
Theory. Once analysis is written it should be declared, so that it
could be later run, e.g., let's extend a hypothetical
["constant-tracker"] analysis:
{[
let () =
Theory.declare "my-constant-tracker" @@
Theory.instance ~require:["bap.std:constant-tracker"] >>=
Theory.require >>|
fun (module Base) : Theory.core -> (module struct
include Base
let add x y =
printf "add is called!\n%!";
add x y
end
]}
The real analysis should store it results either in the knowledge
base or directly in denotations of the terms (or in both places).
{2 Instantiating a theory}
To use a theory we need to instantiate it. In the previous section
we instantiated a theory using the [Theory.require] function, that
returns a previously declared theory. But what if we need to use
several denotations, e.g., when we want to have both a
constant-tracker and BIL for our analysis.
The final style implementations, in Scala, OCaml, and Haskell,
usually employ functors or type classes, which both require a user
to select an instance of a type class, which should be used in
the given context. Some languages allow only one instance of a class
per type, others allow multiple, but still needs a declaration of
some instances as canonical.
The Core Theory addresses this issue by leveraging the structure
of the Knowledge universal values and instantiating all theory
instances simultaneously, so that for each language term the sum
of all denotations is provided. To exclude the overhead of
evaluating denotations that might be unused, it is possible to
limit the set of instantiated theories by specifying a concrete
list of required theories. The requirements are specified in the
form of semantic tags instead of concrete theory names, to prevent
explicit dependencies on implementations. However, it is still
possible to explicitly request a particular theory.
It is also possible to define the context of the theory, to enable
those theories that are not generic and are applicable only to the
specified context. For example,
{[
Theory.instance ()
~context:["arm"; "arm-gnueabi"]
~requires:[
"variable-recovery";
"stack-frame-analysis";
"structural-analysis";
"floating-point";
"bap.std:bil-semantics"
] >>=
Theory.require >>= fun (module Theory) ->
]}
In the example above, theories that are specific to ARM
architecture, in particular to the arm-gnueabi ABI, will be
instantiated (in addition to other general theories). The
[requires] parameter specifies a few semantic tags, describing
what kind of semantic information is needed, as well as one theory
explicitly, the [bap.std:bil-semantics], to ensure that each term
has a BIL denotation.
References:
- [1]: http://okmij.org/ftp/tagless-final/JFP.pdf
- [2]: http://www.cs.utexas.edu/~wcook/Drafts/2012/ecoop2012.pdf
- [3]: http://okmij.org/ftp/tagless-final/course/optimizations.html
{2 Parsing binary code}
After a theory is instantiated it could be used to build terms,
which will trigger analyses associated with each instantiated
theory.
However, a program is usually represented as a binary machine
code, which should be parsed into the Core Theory terms. This
process is called {i lifting} and program components that do
lifting are called {i lifters}.
Lifting is a notoriously hard task, since the machine code is an
untyped representation and Core Theory terms are rigidly typed. To
alleviate this problem, the Core Theory library provides a helper
module [Parser] which could be used to lift an untyped
representation into the typed Core Theory term.
It is also possible to reuse lifters which translate the machine
code in some IL, but writing a parser form that IL. The [Parser]
module is especially useful here, since it was specifically
designed for such use-cases.
*)
open Core_kernel[@@warning "-D"]
open Caml.Format
open Bap_knowledge
module KB = Knowledge
(** The Core Theory. *)
module Theory : sig
(** The class index for all Core Theories. *)
type cls
(** A theory instance.
To create a new theory instance use the {!instance} function.
To manifest a theory into an OCaml module, use the {!require}
function. *)
type theory = cls KB.obj
(** Theory.t is theory. *)
type t = theory
(** The denotation of expression.
Values are used to express the semantics of terms that evaluate
to a value, aka expressions. Values are sorted and value sorts
hold static information about the value representation, like the
number of bits in a bitvector or the representation format in a
floating-point value.
All values belong to the same Knowledge class and thus share the
same set of properties, with each property being a specific
denotation provided by one or more theories. For example, the
[bap.std:exp] slot holds the denotation of a value in terms of
BIL expressions.
*)
module Value : sig
(** a type for the value sort *)
type +'a sort
(** the class of the values. *)
type cls
(** the value type is an instance of Knowledge.value *)
type 'a t = (cls,'a sort) KB.cls KB.value
type 'a value = 'a t
(** the class of all values. *)
val cls : (cls,unit) KB.cls
(** [empty s] creates an empty value of sort [s].
The empty value doesn't hold any denotations and represents an
absence of information about the value.
*)
val empty : 'a sort -> 'a t
(** [sort v] is the value sort.
The value sort holds static information about values of that
sort. *)
val sort : 'a t -> 'a sort
(** [resort refine x] applies [refine] to the sort of [x].
Returns the value [x] with the refined sort, if applicable,
otherwise returns the original value.
@since 2.3.0
*)
val resort : ('a sort -> 'b sort option) -> 'a t -> 'b t option
(** [forget v] erases the type index of the value.
The returned value has the monomorphized [Top.t] type and can
be stored in containers, serialized, etc.
To restore the type index use the {!refine} function.
@since 2.3.0
Note: this is a convenient function that just does
[Knowledge.Value.refine v @@ Sort.forget @@ sort v]
*)
val forget : 'a t -> unit t
(** A value with an erased sort type index.
The monomorphized value could be stored in a container,
serialized and deserialized and otherwise treated as a
regular value. To erase the type index, use the
[Value.forget] function.
The type index could be restored using [Value.refine] or
[Value.Sort.refine] functions.
@since 2.3.0
*)
module Top : sig
type t = (cls,unit sort) KB.cls KB.value
val cls : (cls, unit sort) KB.cls
include KB.Value.S with type t := t
end
(** A eDSL for dispatching on multiple types.
The syntax involves only two operators, [can] that
applys a sort refinining function, and [let|]
glues several cases together. Let's start with a simple
example,
{[
let f x = Match.(begin
let| x = can Bool.refine x @@ fun x ->
(* here x has type [Bool.t value] *)
`Bool x in
let| x = can Bitv.refine x @@ fun x ->
(* and here x is ['a Bitv.t value] *)
`Bitv x in
let| x = can Mem.refine x @@ fun x ->
(* and now x is [('a,'b) Mem.t value] *)
`Mem x in
`Other x
end)
]}
In general, the syntax is
{v
let| x = can s1 x @@ fun (x : t1) ->
body1 in
...
let| x = can sN x @@ fun (x : tN) ->
bodyN in
default
v}
where [s1],...,[sN] a refiners to types [t1],...,[tN],
respectively.
{3 Semantics}
If in [can s1 x body] the sort of [x] can be refined to [t1] using
the refiner [s1] then [body] is applied to the value [x] with
the refined sort (and freshly generated type index if
needed) and the result of the whole expression is [body x]
and the nested below expressions are never
evaluated. Otherwise, if there is no refinement, the
expression [can s1 x body] is evaluated to [()]
and the next case is tried until the [default] case is hit.
@since 2.3.0
*)
module Match : sig
type 'a t
type 'a refiner = unit sort -> 'a sort option
val (let|) : 'b t -> (unit -> 'b) -> 'b
(** [let| () = can s x f in can't] refines [x] to [s].
If the sort of [x] could be refined with [s] then [f]
is called with the refined value [x'] and the whole
expression is evaluated to [f x']. Otherwise, the control is
passed to [can't].
*)
val can : 'a refiner -> unit value -> ('a value -> 'b) -> 'b t
(** [let| () = both sx x sy y f in no] applies two refiners in parallel.
If both [x] and [y] could be refined with [sx] and [sy]
respectively then [f x' y'] is called with the refined
values and becomes the value of the expression. Otherwise,
[no] is evaluated and becomes the value of the whole
expression.
*)
val both :
'a refiner -> unit value ->
'b refiner -> unit value ->
('a value -> 'b value -> 'c) -> 'c t
end
(** Value Sorts.
A concrete and extensible representation of a value sort. The
sort usually holds the static information about the value
representation, like the width of a bitvector, the
representation format of a floating-point number, and so on.
This module is mostly needed when a new sort is defined. The
Core Theory provides a predefined collection of sorts, here is
the list:
- {!Bitv} - bitvectors, e.g., [BitVec(i)]
- {!Mem} - memories, e.g., [Mem(BitVec(i), BitVec(j)]
- {!Float} - floating-points, e.g., [Float(IEEE754(2, 8, 23), BitVec(32)];
- {!Rmode} - rounding mode, e.g., [Rmode].
This module defines a simple DSL for specifying sorts, the DSL
grammar is made only from three rules:
{v
sort = sym | int | sort(sort)
v}
The DSL is embedded into the host language with an infix
operator [@->] for application, e.g., OCaml grammar for sorts is:
[v
sort = sym exp | int exp | sort "@->" sort
exp = ?a valid OCaml expression?
v]
Both symbols and numbers are indexed with a type index, which
serves as a witness of the sort value, e.g.,
{[
type int8
let int8 : int8 num sort = Sort.int 8
]}
Type indices enable explicit reflection of the target language
type system in the host type system, while still keeping the
typing rules under designer's control.
As a working example, let's develop a sort for binary
fixed-point numbers. We need to encode the type of the
underlying bitvector as well as the scaling factor. Suppose,
we chose to encode the scaling factor by an integer position
of the point, e.g., 8 means scaling factor 2^8, i.e., a point
fixed on 8th bit from the left.
The syntax of our sort will be [Fixpoint(<num>,BitVec(<num>))],
but we will keep it private to enable further extensions. The
structure of the sort is explicitly captured in its type, in
our case, it will be ['p num -> 's Bitv.t -> fixpoint sym],
but since we want to keep it is hidden by our type [('p,'s) t].
The same as with the built-in [Bitv] and [Mem] sorts.
We declare a [fixpoint] constructor and keep it private, to
ensure that only we can construct (and refine) fixpoint
sorts. Since the sort type is abstract, we also need to
provide functions that access arguments of our sort.
Finally, we need to provide the [refine] function, that will
cast an untyped sort to its type representation, essentially
proving that the sort is a valid fixpoint sort.
{[
module Fixpoint : sig
type ('p, 's) t
val define : int -> 's Bitv.t sort -> ('p,'s) t sort
val refine : unit sort -> ('p,'s) t sort option
val bits : ('p,'s) t sort -> 's Bitv.t sort
val logscale : ('p,'s) t sort -> int
end = struct
type fixpoint
type ('m,'s) t =
'm Value.Sort.num ->
's Bitv.t ->
fixpoint Value.Sort.sym
let fixpoint = Value.Sort.Name.declare "FixPoint"
let define p s = Value.Sort.(int p @-> s @-> sym fixpoint)
let refine s = Value.Sort.refine fixpoint s
let bits s = Value.Sort.(hd (tl s))
let logscale s = Value.Sort.(hd s)
end
(* Example of usage: *)
type ('m,'s) fixpoint = Fixpoint.t Value.sort
type u32 (* type index for 32 bit ints *)
type p8 (* type index for points at 8th bit *)
(* a sort of 32-bit bitvectors, usually provided by the CPU model *)
let u32 : u32 Bitv.t Value.sort = Bitv.define 32
(* a sort of 8.32 fixed-point numbers. *)
let fp8_32 : (p8,u32) fixpoint = Fixpoint.define 8 u32
]}
*)
module Sort : sig
type +'a t = 'a sort
type +'a sym
type +'a num
type name
(** [sym name] constructs a sort with the given name.
A symbolic sort could represent an abstract data type with
no further information available, e.g., some machine status
word of unknown size or representation; it may also be used
to denote data with obvious representation, e.g., the [Bool]
sort; finally, a symbolic sort could be used as a
constructor name for an indexed sort, e.g., (BitVec(width)).
See the Example in the module description for more
information.
*)
val sym : name -> 'a sym sort
(** [int x] a numeric sort.
While it is possible to create a standalone numeric sort, it
wouldn't be possible to refine it, since only symbolic sorts
re refinable.
Numeric sorts are used mostly as parameters. See the Example
section of the module documentation for more information.
*)
val int : int -> 'a num sort
(** [app s1 s2] constructs a sort of sort [s1] and [s2].
An application could be seen as a tuple building operators,
thus this operation defines a sort that is described by two
other sorts.
Basically, the [app] operator builds a heterogenous list,
with elements which should be other sorts. The list could be
then traversed using the [Sort.hd] and [Sort.tl] operators,
and individual elements could be read with the [value] and
[name] operators. Since the structure of the sort is fully
encoded in this type, those operations are total.
*)
val app : 'a sort -> 'b sort -> ('a -> 'b) sort
(** [s1 @-> s2] is [app s1 s2] *)
val (@->) : 'a sort -> 'b sort -> ('a -> 'b) sort
(** [value s] returns the number associated with the numeric sort. *)
val value : 'a num sort -> int
(** [name s] returns the symbol associated with a symbolic sort *)
val name : 'a sym sort -> name
(** [hd s] the first argument of sort [s] *)
val hd : ('a -> 'b) sort -> 'a sort
(** [tl] the list of arguments of sort [s] excluding the first one*)
val tl : ('a -> 'b) sort -> 'b sort
(** [refine witness s] restores the type of the sort.
The sort type is an index type which could be lost, e.g.,
when the [forget] function is applied or when the sort is
stored and read from its textual representation.
The [refine] function will re-instantiate the type index if
the constructor name of the sort [s] is the [name].
This function gives a mandate for the refine function to
index the sort [s] with any type, which will breach the sort
type safety, therefore this function should be used with
care and be hidden behind the abstraction barrier and have a
concrete type.
See the Example section in the module documentation for the
demonstration of how refine should be used.
*)
val refine : name -> unit sort -> 'a t option
(** [forget s] forgets the type index associated with the sort [s].
This is effectively an upcasting function, that could be
used when the typing information is not necessary
anymore or is not representable. The type index could be
later restored with the [refine] function.
*)
val forget : 'a t -> unit t
(** [same x y] is true if [x] and [y] are of the same structure. *)
val same : 'a t -> 'b t -> bool
(** prints the sort. *)
val pp : formatter -> 'a t -> unit
(** Sorts with erased type indices.
This module enables construction of complex data structures
on sorts, like maps, sets, etc, e.g.,
[let sorts = Set.empty (module Value.Sort.Top)]
Since such structures are required to be monomorphic, the
sort type index should be removed using the [forget] function,
before a sort could be stored in it.
Note, that the type index is only removed from the meta
language (OCaml) type, but is preserved in the value term,
so it could be reconstructed (refined) later.
*)
module Top : sig
type t = unit sort [@@deriving bin_io, compare, sexp]
include Base.Comparable.S with type t := t
end
(** The name registry.
Names of symbols must be unique as the name is used as a
witness of authenticity of the sort. Once obtained, the name
should be kept secret beyond the module signature.
See the Example section of the [Sort] module documentation
for more information.
*)
module Name : sig
type t
(** [declare ?package name] declares a new [name].
The declared name must be unique to the [package]. If such
name is already declared in the [package], then the
declaration fails.
*)
val declare : ?package:string -> string -> name
include Base.Comparable.S with type t := t
end
end
end
(** The denotation of statements.
An effect is a Knowledge value that is used to give a denotation
to the language forms that do not evaluate to values but change
the state of the system, i.e., to what is usually called
"statement".
All effects belong to the same Knowledge class and share the
same set of properties, with each property being a specific
denotation provided by on or more theories. For example,
[bap.std:bil] slot holds the denotation of a value in terms of
the BIL statements.
*)
module Effect : sig
(** a type for the effect sort *)
type +'a sort
(** the class of effects. *)
type cls
(** the effect type is an instance of the Knowledge.value *)
type 'a t = (cls,'a sort) KB.cls KB.value
(** the class of all effects. *)
val cls : (cls,unit) KB.cls
(** [empty s] creates an empty effect value.
The empty effect denotes an absence of any specific knowledge
about the effects produced by a term.
*)
val empty : 'a sort -> 'a t
(** [sort eff] returns the sort of the effect [eff]. *)
val sort : 'a t -> 'a sort
(** Effect sorts.
The sort of an effect holds static information that is common
to all effects of that sort.
We distinguish two kinds of effects - [ctrl] effects that affect
which instructions will be executed next and [data] effects that
affect only the values in the computer storage.
The [unit] effect represents an effect that is a mixture of
[ctrl] and [data] effects.
*)
module Sort : sig
type +'a t = 'a sort
type data = private Data
type ctrl = private Ctrl
(** [data kind] defines a data effect of the [kind]. *)
val data : string -> data t
(** [ctrl kind] defines a ctrl effect of the [kind]. *)
val ctrl : string -> ctrl t
(** [top] is a set of all possible effects.
This sort indicates that the statement can have any effect.
*)
val top : unit t
(** [bot] is an empty set of effects.
This sort indicates that the statement doesn't have any
observable effects, thus it could be coerced to any other
sort.
*)
val bot : 'a t
(** [both s1 s2] an effect of both [s1] and [s2] *)
val both : 'a t -> 'a t -> 'a t
(** [s1 && s2] is [both s1 s2]. *)
val (&&) : 'a t -> 'a t -> 'a t
(** [union [s1;...;sN] is [s1 && ... && sN]. *)
val union : 'a t list -> 'a t
(** [join xs ys] is [union [union xs; union ys ]]. *)
val join : 'a t list -> 'b t list -> unit t
(** [order xs ys] orders effects by the order of inclusion.
[xs] is before [ys] if [ys] includes all effects of [xs],
otherwise.
*)
val order : 'a t -> 'b t -> KB.Order.partial
(** the register read effect. *)
val rreg : data t
(** the register write effect. *)
val wreg : data t
(** the memory read effect. *)
val rmem : data t
(** is the memory write effect. *)
val wmem : data t
(** the memory barrier effect *)
val barr : data t
(** the normal control flow effect *)
val fall : ctrl t
(** the jump effect. *)
val jump : ctrl t
(** the conditional branching effect *)
val cjmp : ctrl t
end
end
type 'a value = 'a Value.t
type 'a effect = 'a Effect.t
(** The sort for boolean values.
Booleans are one bit values.
*)
module Bool : sig
type t
(** the Bool sort. *)
val t : t Value.sort
(** [refine s] if [s] is [Bool] then returns [Some t]. *)
val refine : unit Value.sort -> t Value.sort option
end
(** Sorts of bitvectors *)
module Bitv : sig
type 'a t
(** [define size] defines a sort of bitvectors of the given [size]. *)
val define : int -> 'a t Value.sort
(** [refine s] if [s] is a bitvector sort, then restores its type. *)
val refine : unit Value.sort -> 'a t Value.sort option
(** [size s] the [size] argument of [s]. *)
val size : 'a t Value.sort -> int
end
(** Sorts of memories.
A memory is an associative container of bitvectors indexed with
bitvector keys.
*)
module Mem : sig
type ('a,'b) t
(** [define ks vs] a sort of memories with keys of type [ks] and
data of type [vs]. *)
val define : 'a Bitv.t Value.sort -> 'b Bitv.t Value.sort -> ('a,'b) t Value.sort