From 5ffd154cf61390c1ed32a1b0eb134929f78c0fbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= Date: Thu, 16 May 2024 12:10:56 +0200 Subject: [PATCH] [analyzer] Clean up list of taint propagation functions (#91635) This commit refactors GenericTaintChecker and performs various improvements in the list of taint propagation functions: 1. The matching mode (usually `CDM::CLibrary` or `CDM::CLibraryMaybeHardened`) was specified to avoid matching e.g. C++ methods or functions from a user-defined namespace that happen to share the name of a well-known library function. 2. With these matching modes, a `CallDescription` can automatically match builtin variants of the functions, so entries that explicitly specified a builtin function were removed. This eliminated inconsistencies where the "normal" and the builtin variant of the same function was handled differently (e.g. `__builtin_strlcat` was covered, while plain `strlcat` wasn't; while `__builtin_memcpy` and `memcpy` were both on the list with different propagation rules). 3. The modeling of the functions `strlcat` and `strncat` was updated to propagate taint from the first argument (index 0), because a tainted string should remain tainted even if we append something else to it. Note that this was already applied to `strcat` and `wcsncat` by commit 6ceb1c0ef9f544be0eed65e46cc7d99941a001bf. 4. Some functions were updated to propagate taint from a size/length argument to the result: e.g. `memcmp(p, q, get_tainted_int())` will now return a tainted value (because the attacker can manipulate it). This principle was already present in some propagation rules (e.g. `__builtin_memcpy` was handled this way), and even after this commit there are still some functions where it isn't applied. (I only aimed for consistency within the same function family.) 5. Functions that have hardened `__FOO_chk()` variants are matched in `CDM:CLibraryMaybeHardened` to ensure consistent handling of the "normal" and the hardened variant. I added special handling for the hardened variants of "sprintf" and "snprintf" because there the extra parameters are inserted into the middle of the parameter list. 6. Modeling of `sscanf_s` was added, to complete the group of `fscanf`, `fscanf_s` and `sscanf`. 7. The `Source()` specifications for `gets`, `gets_s` and `wgetch` were ill-formed: they were specifying variadic arguments starting at argument index `ReturnValueIndex`. (That is, in addition to the return value they were propagating taint to all arguments.) 8. Functions that were related to each other were grouped together. (I know that this makes the diff harder to read, but I felt that the full list is unreadable without some reorganization.) 9. I spotted and removed some redundant curly braces. Perhaps would be good to switch to a cleaner layout with less nested braces... 10. I updated some obsolete comments and added two TODOs for issues that should be fixed in followup commits. --- .../Checkers/GenericTaintChecker.cpp | 370 ++++++++++-------- 1 file changed, 204 insertions(+), 166 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index a0190c30bfd28..eb40a4cd3d902 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -400,17 +400,14 @@ class GenericTaintChecker : public Checker { void taintUnsafeSocketProtocol(const CallEvent &Call, CheckerContext &C) const; - /// Default taint rules are initalized with the help of a CheckerContext to - /// access the names of built-in functions like memcpy. + /// The taint rules are initalized with the help of a CheckerContext to + /// access user-provided configuration. void initTaintRules(CheckerContext &C) const; - /// CallDescription currently cannot restrict matches to the global namespace - /// only, which is why multiple CallDescriptionMaps are used, as we want to - /// disambiguate global C functions from functions inside user-defined - /// namespaces. - // TODO: Remove separation to simplify matching logic once CallDescriptions - // are more expressive. - + // TODO: The two separate `CallDescriptionMap`s were introduced when + // `CallDescription` was unable to restrict matches to the global namespace + // only. This limitation no longer exists, so the following two maps should + // be unified. mutable std::optional StaticTaintRules; mutable std::optional DynamicTaintRules; }; @@ -506,7 +503,8 @@ void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule, RulesContTy &Rules) { NamePartsTy NameParts = parseNameParts(C); - Rules.emplace_back(CallDescription(NameParts), std::move(Rule)); + Rules.emplace_back(CallDescription(CDM::Unspecified, NameParts), + std::move(Rule)); } void GenericTaintRuleParser::parseConfig(const std::string &Option, @@ -572,196 +570,236 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { std::vector>; using TR = GenericTaintRule; - const Builtin::Context &BI = C.getASTContext().BuiltinInfo; - RulesConstructionTy GlobalCRules{ // Sources - {{{"fdopen"}}, TR::Source({{ReturnValueIndex}})}, - {{{"fopen"}}, TR::Source({{ReturnValueIndex}})}, - {{{"freopen"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getch"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getchar"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})}, - {{{"gets"}}, TR::Source({{0}, ReturnValueIndex})}, - {{{"gets_s"}}, TR::Source({{0}, ReturnValueIndex})}, - {{{"scanf"}}, TR::Source({{}, 1})}, - {{{"scanf_s"}}, TR::Source({{}, {1}})}, - {{{"wgetch"}}, TR::Source({{}, ReturnValueIndex})}, + {{CDM::CLibrary, {"fdopen"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"fopen"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"freopen"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getch"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getchar"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"gets"}}, TR::Source({{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"gets_s"}}, TR::Source({{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"scanf"}}, TR::Source({{}, 1})}, + {{CDM::CLibrary, {"scanf_s"}}, TR::Source({{}, 1})}, + {{CDM::CLibrary, {"wgetch"}}, TR::Source({{ReturnValueIndex}})}, // Sometimes the line between taint sources and propagators is blurry. // _IO_getc is choosen to be a source, but could also be a propagator. // This way it is simpler, as modeling it as a propagator would require // to model the possible sources of _IO_FILE * values, which the _IO_getc // function takes as parameters. - {{{"_IO_getc"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getcwd"}}, TR::Source({{0, ReturnValueIndex}})}, - {{{"getwd"}}, TR::Source({{0, ReturnValueIndex}})}, - {{{"readlink"}}, TR::Source({{1, ReturnValueIndex}})}, - {{{"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})}, - {{{"get_current_dir_name"}}, TR::Source({{ReturnValueIndex}})}, - {{{"gethostname"}}, TR::Source({{0}})}, - {{{"getnameinfo"}}, TR::Source({{2, 4}})}, - {{{"getseuserbyname"}}, TR::Source({{1, 2}})}, - {{{"getgroups"}}, TR::Source({{1, ReturnValueIndex}})}, - {{{"getlogin"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getlogin_r"}}, TR::Source({{0}})}, + {{CDM::CLibrary, {"_IO_getc"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getcwd"}}, TR::Source({{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"getwd"}}, TR::Source({{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"readlink"}}, TR::Source({{1, ReturnValueIndex}})}, + {{CDM::CLibrary, {"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})}, + {{CDM::CLibrary, {"get_current_dir_name"}}, + TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"gethostname"}}, TR::Source({{0}})}, + {{CDM::CLibrary, {"getnameinfo"}}, TR::Source({{2, 4}})}, + {{CDM::CLibrary, {"getseuserbyname"}}, TR::Source({{1, 2}})}, + {{CDM::CLibrary, {"getgroups"}}, TR::Source({{1, ReturnValueIndex}})}, + {{CDM::CLibrary, {"getlogin"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getlogin_r"}}, TR::Source({{0}})}, // Props - {{{"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, - {{{"fgetws"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, - {{{"fscanf"}}, TR::Prop({{0}}, {{}, 2})}, - {{{"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})}, - {{{"sscanf"}}, TR::Prop({{0}}, {{}, 2})}, - - {{{"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"getc_unlocked"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"getdelim"}}, TR::Prop({{3}}, {{0}})}, - {{{"getline"}}, TR::Prop({{2}}, {{0}})}, - {{{"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"pread"}}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})}, - {{{"read"}}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})}, - {{{"strchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"strrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fread"}}, TR::Prop({{3}}, {{0, ReturnValueIndex}})}, - {{{"recv"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - {{{"recvfrom"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - - {{{"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"ttyname_r"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - - {{{"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})}, - {{{"memchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"memrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{{"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{{"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{{"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{{"memcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, - {{{"memcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{{"memmove"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - // If memmem was called with a tainted needle and the search was - // successful, that would mean that the value pointed by the return value - // has the same content as the needle. If we choose to go by the policy of - // content equivalence implies taintedness equivalence, that would mean - // haystack should be considered a propagation source argument. - {{{"memmem"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - // The comment for memmem above also applies to strstr. - {{{"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{{"strchrnul"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{{"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"fgets"}}, + TR::Prop({{2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"fgetws"}}, + TR::Prop({{2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"fscanf"}}, TR::Prop({{0}}, {{}, 2})}, + {{CDM::CLibrary, {"fscanf_s"}}, TR::Prop({{0}}, {{}, 2})}, + {{CDM::CLibrary, {"sscanf"}}, TR::Prop({{0}}, {{}, 2})}, + {{CDM::CLibrary, {"sscanf_s"}}, TR::Prop({{0}}, {{}, 2})}, + + {{CDM::CLibrary, {"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getc_unlocked"}}, + TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getdelim"}}, TR::Prop({{3}}, {{0}})}, + // TODO: this intends to match the C function `getline()`, but the call + // description also matches the C++ function `std::getline()`; it should + // be ruled out by some additional logic. + {{CDM::CLibrary, {"getline"}}, TR::Prop({{2}}, {{0}})}, + {{CDM::CLibrary, {"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"pread"}}, + TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"read"}}, + TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"fread"}}, + TR::Prop({{3}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"recv"}}, + TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"recvfrom"}}, + TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + + {{CDM::CLibrary, {"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"ttyname_r"}}, + TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + + {{CDM::CLibrary, {"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})}, + + {{CDM::CLibrary, {"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{CDM::CLibrary, {"memcmp"}}, + TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"memcpy"}}, + TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"memmove"}}, + TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"bcopy"}}, TR::Prop({{0, 2}}, {{1}})}, + + // Note: "memmem" and its variants search for a byte sequence ("needle") + // in a larger area ("haystack"). Currently we only propagate taint from + // the haystack to the result, but in theory tampering with the needle + // could also produce incorrect results. + {{CDM::CLibrary, {"memmem"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + // Analogously, the following functions search for a byte within a buffer + // and we only propagate taint from the buffer to the result. + {{CDM::CLibraryMaybeHardened, {"memchr"}}, + TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"memrchr"}}, + TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"strchr"}}, + TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"strrchr"}}, + TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"strchrnul"}}, + TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, // FIXME: In case of arrays, only the first element of the array gets // tainted. - {{{"qsort"}}, TR::Prop({{0}}, {{0}})}, - {{{"qsort_r"}}, TR::Prop({{0}}, {{0}})}, - - {{{"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, - {{{"strcasecmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, - {{{"strncmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, - {{{"strncasecmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, - {{{"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, - {{{"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, - {{{"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"qsort"}}, TR::Prop({{0}}, {{0}})}, + {{CDM::CLibrary, {"qsort_r"}}, TR::Prop({{0}}, {{0}})}, + + {{CDM::CLibrary, {"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strcasecmp"}}, + TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strncmp"}}, + TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strncasecmp"}}, + TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{CDM::CLibrary, {"strndup"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strndupa"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, // strlen, wcslen, strnlen and alike intentionally don't propagate taint. // See the details here: https://github.com/llvm/llvm-project/pull/66086 - {{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - {{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - {{{"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - {{{"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - - {{{"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrncat)}}, - TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrlcpy)}}, - TR::Prop({{1, 2}}, {{0}})}, - {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrlcat)}}, - TR::Prop({{1, 2}}, {{0}})}, - {{CDM::CLibraryMaybeHardened, {{"snprintf"}}}, - TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})}, - {{CDM::CLibraryMaybeHardened, {{"sprintf"}}}, - TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})}, - {{CDM::CLibraryMaybeHardened, {{"strcpy"}}}, + {{CDM::CLibrary, {"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{CDM::CLibrary, {"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{CDM::CLibrary, {"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{CDM::CLibrary, {"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + + {{CDM::CLibrary, {"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{CDM::CLibrary, {"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{CDM::CLibraryMaybeHardened, {"strcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibraryMaybeHardened, {{"stpcpy"}}}, + {{CDM::CLibraryMaybeHardened, {"stpcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibraryMaybeHardened, {{"strcat"}}}, + {{CDM::CLibraryMaybeHardened, {"strcat"}}, TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibraryMaybeHardened, {{"wcsncat"}}}, + {{CDM::CLibraryMaybeHardened, {"wcsncat"}}, TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{CDM::CLibrary, {{"strdupa"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{CDM::CLibrary, {{"wcsdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{CDM::CLibrary, BI.getName(Builtin::BImemcpy)}, - TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {BI.getName(Builtin::BImemmove)}}, + {{CDM::CLibraryMaybeHardened, {"strncpy"}}, TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {BI.getName(Builtin::BIstrncpy)}}, - TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {BI.getName(Builtin::BIstrndup)}}, - TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, - {{CDM::CLibrary, {"bcopy"}}, TR::Prop({{0, 2}}, {{1}})}, + {{CDM::CLibraryMaybeHardened, {"strncat"}}, + TR::Prop({{0, 1, 2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"strlcpy"}}, TR::Prop({{1, 2}}, {{0}})}, + {{CDM::CLibraryMaybeHardened, {"strlcat"}}, TR::Prop({{0, 1, 2}}, {{0}})}, + + // Usually the matching mode `CDM::CLibraryMaybeHardened` is sufficient + // for unified handling of a function `FOO()` and its hardened variant + // `__FOO_chk()`, but in the "sprintf" family the extra parameters of the + // hardened variants are inserted into the middle of the parameter list, + // so that would not work in their case. + // int snprintf(char * str, size_t maxlen, const char * format, ...); + {{CDM::CLibrary, {"snprintf"}}, + TR::Prop({{1, 2}, 3}, {{0, ReturnValueIndex}})}, + // int sprintf(char * str, const char * format, ...); + {{CDM::CLibrary, {"sprintf"}}, + TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})}, + // int __snprintf_chk(char * str, size_t maxlen, int flag, size_t strlen, + // const char * format, ...); + {{CDM::CLibrary, {"__snprintf_chk"}}, + TR::Prop({{1, 4}, 5}, {{0, ReturnValueIndex}})}, + // int __sprintf_chk(char * str, int flag, size_t strlen, const char * + // format, ...); + {{CDM::CLibrary, {"__sprintf_chk"}}, + TR::Prop({{3}, 4}, {{0, ReturnValueIndex}})}, // Sinks - {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, - {{{"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, - {{{"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, - {{{"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, - {{{"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, - {{{"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, - {{{"execve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, - {{{"fexecve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, - {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, - {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, - {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execve"}}, + TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"fexecve"}}, + TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execvpe"}}, + TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, // malloc, calloc, alloca, realloc, memccpy // are intentionally not marked as taint sinks because unconditional // reporting for these functions generates many false positives. // These taint sinks should be implemented in other checkers with more // sophisticated sanitation heuristics. - {{{{"setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, - {{{{"setproctitle_fast"}}}, + + {{CDM::CLibrary, {"setproctitle"}}, + TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, + {{CDM::CLibrary, {"setproctitle_fast"}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}}; - // `getenv` returns taint only in untrusted environments. if (TR::UntrustedEnv(C)) { // void setproctitle_init(int argc, char *argv[], char *envp[]) + // TODO: replace `MsgCustomSink` with a message that fits this situation. + GlobalCRules.push_back({{CDM::CLibrary, {"setproctitle_init"}}, + TR::Sink({{1, 2}}, MsgCustomSink)}); + + // `getenv` returns taint only in untrusted environments. GlobalCRules.push_back( - {{{"setproctitle_init"}}, TR::Sink({{1, 2}}, MsgCustomSink)}); - GlobalCRules.push_back({{{"getenv"}}, TR::Source({{ReturnValueIndex}})}); + {{CDM::CLibrary, {"getenv"}}, TR::Source({{ReturnValueIndex}})}); } StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),