Skip to content

Commit 5cbab07

Browse files
committed
DataFlowSanitizer; compiler-rt changes.
DataFlowSanitizer is a generalised dynamic data flow analysis. Unlike other Sanitizer tools, this tool is not designed to detect a specific class of bugs on its own. Instead, it provides a generic dynamic data flow analysis framework to be used by clients to help detect application-specific issues within their own code. Differential Revision: http://llvm-reviews.chandlerc.com/D967 llvm-svn: 187924
1 parent e5d5b0c commit 5cbab07

File tree

14 files changed

+541
-1
lines changed

14 files changed

+541
-1
lines changed

compiler-rt/include/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
set(SANITIZER_HEADERS
22
sanitizer/asan_interface.h
33
sanitizer/common_interface_defs.h
4+
sanitizer/dfsan_interface.h
45
sanitizer/linux_syscall_hooks.h
56
sanitizer/lsan_interface.h
67
sanitizer/msan_interface.h)
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
//===-- dfsan_interface.h -------------------------------------------------===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This file is a part of DataFlowSanitizer.
11+
//
12+
// Public interface header.
13+
//===----------------------------------------------------------------------===//
14+
#ifndef DFSAN_INTERFACE_H
15+
#define DFSAN_INTERFACE_H
16+
17+
#include <stddef.h>
18+
#include <stdint.h>
19+
#include <sanitizer/common_interface_defs.h>
20+
21+
#ifdef __cplusplus
22+
extern "C" {
23+
#endif
24+
25+
typedef uint16_t dfsan_label;
26+
27+
/// Stores information associated with a specific label identifier. A label
28+
/// may be a base label created using dfsan_create_label, with associated
29+
/// text description and user data, or an automatically created union label,
30+
/// which represents the union of two label identifiers (which may themselves
31+
/// be base or union labels).
32+
struct dfsan_label_info {
33+
// Fields for union labels, set to 0 for base labels.
34+
dfsan_label l1;
35+
dfsan_label l2;
36+
37+
// Fields for base labels.
38+
const char *desc;
39+
void *userdata;
40+
};
41+
42+
/// Creates and returns a base label with the given description and user data.
43+
dfsan_label dfsan_create_label(const char *desc, void *userdata);
44+
45+
/// Sets the label for each address in [addr,addr+size) to \c label.
46+
void dfsan_set_label(dfsan_label label, void *addr, size_t size);
47+
48+
/// Sets the label for each address in [addr,addr+size) to the union of the
49+
/// current label for that address and \c label.
50+
void dfsan_add_label(dfsan_label label, void *addr, size_t size);
51+
52+
/// Retrieves the label associated with the given data.
53+
///
54+
/// The type of 'data' is arbitrary. The function accepts a value of any type,
55+
/// which can be truncated or extended (implicitly or explicitly) as necessary.
56+
/// The truncation/extension operations will preserve the label of the original
57+
/// value.
58+
dfsan_label dfsan_get_label(long data);
59+
60+
/// Retrieves a pointer to the dfsan_label_info struct for the given label.
61+
const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label);
62+
63+
/// Returns whether the given label label contains the label elem.
64+
int dfsan_has_label(dfsan_label label, dfsan_label elem);
65+
66+
/// If the given label label contains a label with the description desc, returns
67+
/// that label, else returns 0.
68+
dfsan_label dfsan_has_label_with_desc(dfsan_label label, const char *desc);
69+
70+
#ifdef __cplusplus
71+
} // extern "C"
72+
73+
template <typename T>
74+
void dfsan_set_label(dfsan_label label, T &data) {
75+
dfsan_set_label(label, (void *)&data, sizeof(T));
76+
}
77+
78+
#endif
79+
80+
#endif // DFSAN_INTERFACE_H

compiler-rt/lib/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux" AND NOT ANDROID)
1717
add_subdirectory(tsan)
1818
add_subdirectory(msan)
1919
add_subdirectory(msandr)
20+
add_subdirectory(dfsan)
2021
endif()
2122

2223
# The top-level lib directory contains a large amount of C code which provides

compiler-rt/lib/Makefile.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ SubDirs += tsan
2222
SubDirs += msan
2323
SubDirs += ubsan
2424
SubDirs += lsan
25+
SubDirs += dfsan
2526

2627
# Define the variables for this specific directory.
2728
Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))

compiler-rt/lib/dfsan/CMakeLists.txt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
include_directories(..)
2+
3+
# Runtime library sources and build flags.
4+
set(DFSAN_RTL_SOURCES
5+
dfsan.cc
6+
)
7+
set(DFSAN_RTL_CFLAGS
8+
${SANITIZER_COMMON_CFLAGS}
9+
# Prevent clang from generating libc calls.
10+
-ffreestanding)
11+
12+
# Static runtime library.
13+
set(DFSAN_RUNTIME_LIBRARIES)
14+
set(arch "x86_64")
15+
if(CAN_TARGET_${arch})
16+
add_compiler_rt_static_runtime(clang_rt.dfsan-${arch} ${arch}
17+
SOURCES ${DFSAN_RTL_SOURCES}
18+
$<TARGET_OBJECTS:RTInterception.${arch}>
19+
$<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
20+
$<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
21+
CFLAGS ${DFSAN_RTL_CFLAGS} -fPIE)
22+
add_compiler_rt_static_runtime(clang_rt.dfsan-libc-${arch} ${arch}
23+
SOURCES ${DFSAN_RTL_SOURCES}
24+
$<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
25+
CFLAGS ${DFSAN_RTL_CFLAGS} -fPIC -DDFSAN_NOLIBC)
26+
list(APPEND DFSAN_RUNTIME_LIBRARIES clang_rt.dfsan-${arch})
27+
endif()
28+
29+
add_subdirectory(lit_tests)

compiler-rt/lib/dfsan/Makefile.mk

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#===- lib/dfsan/Makefile.mk --------------------------------*- Makefile -*--===#
2+
#
3+
# The LLVM Compiler Infrastructure
4+
#
5+
# This file is distributed under the University of Illinois Open Source
6+
# License. See LICENSE.TXT for details.
7+
#
8+
#===------------------------------------------------------------------------===#
9+
10+
ModuleName := dfsan
11+
SubDirs :=
12+
13+
Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
14+
ObjNames := $(Sources:%.cc=%.o)
15+
16+
Implementation := Generic
17+
18+
# FIXME: use automatic dependencies?
19+
Dependencies := $(wildcard $(Dir)/*.h)
20+
Dependencies += $(wildcard $(Dir)/../sanitizer_common/*.h)
21+
22+
# Define a convenience variable for all the dfsan functions.
23+
DfsanFunctions := $(Sources:%.cc=%)

compiler-rt/lib/dfsan/dfsan.cc

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
//===-- dfsan.cc ----------------------------------------------------------===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This file is a part of DataFlowSanitizer.
11+
//
12+
// DataFlowSanitizer runtime. This file defines the public interface to
13+
// DataFlowSanitizer as well as the definition of certain runtime functions
14+
// called automatically by the compiler (specifically the instrumentation pass
15+
// in llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp).
16+
//
17+
// The public interface is defined in include/sanitizer/dfsan_interface.h whose
18+
// functions are prefixed dfsan_ while the compiler interface functions are
19+
// prefixed __dfsan_.
20+
//===----------------------------------------------------------------------===//
21+
22+
#include "sanitizer/dfsan_interface.h"
23+
#include "sanitizer_common/sanitizer_atomic.h"
24+
#include "sanitizer_common/sanitizer_common.h"
25+
#include "sanitizer_common/sanitizer_libc.h"
26+
27+
typedef atomic_uint16_t atomic_dfsan_label;
28+
static const dfsan_label kInitializingLabel = -1;
29+
30+
static const uptr kNumLabels = 1 << (sizeof(dfsan_label) * 8);
31+
32+
static atomic_dfsan_label __dfsan_last_label;
33+
static dfsan_label_info __dfsan_label_info[kNumLabels];
34+
35+
SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_retval_tls;
36+
SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_arg_tls[64];
37+
38+
// On Linux/x86_64, memory is laid out as follows:
39+
//
40+
// +--------------------+ 0x800000000000 (top of memory)
41+
// | application memory |
42+
// +--------------------+ 0x700000008000 (kAppAddr)
43+
// | |
44+
// | unused |
45+
// | |
46+
// +--------------------+ 0x200200000000 (kUnusedAddr)
47+
// | union table |
48+
// +--------------------+ 0x200000000000 (kUnionTableAddr)
49+
// | shadow memory |
50+
// +--------------------+ 0x000000010000 (kShadowAddr)
51+
// | reserved by kernel |
52+
// +--------------------+ 0x000000000000
53+
//
54+
// To derive a shadow memory address from an application memory address,
55+
// bits 44-46 are cleared to bring the address into the range
56+
// [0x000000008000,0x100000000000). Then the address is shifted left by 1 to
57+
// account for the double byte representation of shadow labels and move the
58+
// address into the shadow memory range. See the function shadow_for below.
59+
60+
typedef atomic_dfsan_label dfsan_union_table_t[kNumLabels][kNumLabels];
61+
62+
static const uptr kShadowAddr = 0x10000;
63+
static const uptr kUnionTableAddr = 0x200000000000;
64+
static const uptr kUnusedAddr = kUnionTableAddr + sizeof(dfsan_union_table_t);
65+
static const uptr kAppAddr = 0x700000008000;
66+
67+
static atomic_dfsan_label *union_table(dfsan_label l1, dfsan_label l2) {
68+
return &(*(dfsan_union_table_t *) kUnionTableAddr)[l1][l2];
69+
}
70+
71+
static dfsan_label *shadow_for(void *ptr) {
72+
return (dfsan_label *) ((((uintptr_t) ptr) & ~0x700000000000) << 1);
73+
}
74+
75+
// Resolves the union of two unequal labels. Nonequality is a precondition for
76+
// this function (the instrumentation pass inlines the equality test).
77+
extern "C" SANITIZER_INTERFACE_ATTRIBUTE
78+
dfsan_label __dfsan_union(dfsan_label l1, dfsan_label l2) {
79+
DCHECK_NE(l1, l2);
80+
81+
if (l1 == 0)
82+
return l2;
83+
if (l2 == 0)
84+
return l1;
85+
86+
if (l1 > l2)
87+
Swap(l1, l2);
88+
89+
atomic_dfsan_label *table_ent = union_table(l1, l2);
90+
// We need to deal with the case where two threads concurrently request
91+
// a union of the same pair of labels. If the table entry is uninitialized,
92+
// (i.e. 0) use a compare-exchange to set the entry to kInitializingLabel
93+
// (i.e. -1) to mark that we are initializing it.
94+
dfsan_label label = 0;
95+
if (atomic_compare_exchange_strong(table_ent, &label, kInitializingLabel,
96+
memory_order_acquire)) {
97+
// Check whether l2 subsumes l1. We don't need to check whether l1
98+
// subsumes l2 because we are guaranteed here that l1 < l2, and (at least
99+
// in the cases we are interested in) a label may only subsume labels
100+
// created earlier (i.e. with a lower numerical value).
101+
if (__dfsan_label_info[l2].l1 == l1 ||
102+
__dfsan_label_info[l2].l2 == l1) {
103+
label = l2;
104+
} else {
105+
label =
106+
atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
107+
CHECK_NE(label, kInitializingLabel);
108+
__dfsan_label_info[label].l1 = l1;
109+
__dfsan_label_info[label].l2 = l2;
110+
}
111+
atomic_store(table_ent, label, memory_order_release);
112+
} else if (label == kInitializingLabel) {
113+
// Another thread is initializing the entry. Wait until it is finished.
114+
do {
115+
internal_sched_yield();
116+
label = atomic_load(table_ent, memory_order_acquire);
117+
} while (label == kInitializingLabel);
118+
}
119+
return label;
120+
}
121+
122+
extern "C" SANITIZER_INTERFACE_ATTRIBUTE
123+
dfsan_label __dfsan_union_load(dfsan_label *ls, size_t n) {
124+
dfsan_label label = ls[0];
125+
for (size_t i = 1; i != n; ++i) {
126+
dfsan_label next_label = ls[i];
127+
if (label != next_label)
128+
label = __dfsan_union(label, next_label);
129+
}
130+
return label;
131+
}
132+
133+
extern "C" SANITIZER_INTERFACE_ATTRIBUTE
134+
void *__dfsan_memcpy(void *dest, const void *src, size_t n) {
135+
dfsan_label *sdest = shadow_for(dest), *ssrc = shadow_for((void *)src);
136+
internal_memcpy((void *)sdest, (void *)ssrc, n * sizeof(dfsan_label));
137+
return internal_memcpy(dest, src, n);
138+
}
139+
140+
SANITIZER_INTERFACE_ATTRIBUTE
141+
dfsan_label dfsan_create_label(const char *desc, void *userdata) {
142+
dfsan_label label =
143+
atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
144+
CHECK_NE(label, kInitializingLabel);
145+
__dfsan_label_info[label].l1 = __dfsan_label_info[label].l2 = 0;
146+
__dfsan_label_info[label].desc = desc;
147+
__dfsan_label_info[label].userdata = userdata;
148+
__dfsan_retval_tls = 0; // Ensures return value is unlabelled in the caller.
149+
return label;
150+
}
151+
152+
SANITIZER_INTERFACE_ATTRIBUTE
153+
void dfsan_set_label(dfsan_label label, void *addr, size_t size) {
154+
for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp)
155+
*labelp = label;
156+
}
157+
158+
SANITIZER_INTERFACE_ATTRIBUTE
159+
void dfsan_add_label(dfsan_label label, void *addr, size_t size) {
160+
for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp)
161+
if (*labelp != label)
162+
*labelp = __dfsan_union(*labelp, label);
163+
}
164+
165+
SANITIZER_INTERFACE_ATTRIBUTE dfsan_label dfsan_get_label(long data) {
166+
// The label for 'data' is implicitly passed by the instrumentation pass in
167+
// the first element of __dfsan_arg_tls. So we can just return it.
168+
__dfsan_retval_tls = 0; // Ensures return value is unlabelled in the caller.
169+
return __dfsan_arg_tls[0];
170+
}
171+
172+
SANITIZER_INTERFACE_ATTRIBUTE
173+
const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) {
174+
__dfsan_retval_tls = 0; // Ensures return value is unlabelled in the caller.
175+
return &__dfsan_label_info[label];
176+
}
177+
178+
int dfsan_has_label(dfsan_label label, dfsan_label elem) {
179+
__dfsan_retval_tls = 0; // Ensures return value is unlabelled in the caller.
180+
if (label == elem)
181+
return true;
182+
const dfsan_label_info *info = dfsan_get_label_info(label);
183+
if (info->l1 != 0) {
184+
return dfsan_has_label(info->l1, elem) || dfsan_has_label(info->l2, elem);
185+
} else {
186+
return false;
187+
}
188+
}
189+
190+
dfsan_label dfsan_has_label_with_desc(dfsan_label label, const char *desc) {
191+
__dfsan_retval_tls = 0; // Ensures return value is unlabelled in the caller.
192+
const dfsan_label_info *info = dfsan_get_label_info(label);
193+
if (info->l1 != 0) {
194+
return dfsan_has_label_with_desc(info->l1, desc) ||
195+
dfsan_has_label_with_desc(info->l2, desc);
196+
} else {
197+
return internal_strcmp(desc, info->desc) == 0;
198+
}
199+
}
200+
201+
#ifdef DFSAN_NOLIBC
202+
extern "C" void dfsan_init() {
203+
#else
204+
static void dfsan_init(int argc, char **argv, char **envp) {
205+
#endif
206+
MmapFixedNoReserve(kShadowAddr, kUnusedAddr - kShadowAddr);
207+
208+
// Protect the region of memory we don't use, to preserve the one-to-one
209+
// mapping from application to shadow memory. But if ASLR is disabled, Linux
210+
// will load our executable in the middle of our unused region. This mostly
211+
// works so long as the program doesn't use too much memory. We support this
212+
// case by disabling memory protection when ASLR is disabled.
213+
uptr init_addr = (uptr)&dfsan_init;
214+
if (!(init_addr >= kUnusedAddr && init_addr < kAppAddr))
215+
Mprotect(kUnusedAddr, kAppAddr - kUnusedAddr);
216+
}
217+
218+
#ifndef DFSAN_NOLIBC
219+
__attribute__((section(".preinit_array"), used))
220+
static void (*dfsan_init_ptr)(int, char **, char **) = dfsan_init;
221+
#endif

0 commit comments

Comments
 (0)