-
Notifications
You must be signed in to change notification settings - Fork 415
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add IAST propagation for string' split, rsplit and splitlines (#…
…9113) ## Description Add propagation for the split/rsplit/splitlines methods. ## Checklist - [X] Change(s) are motivated and described in the PR description - [X] Testing strategy is described if automated tests are not included in the PR - [X] Risks are described (performance impact, potential for breakage, maintainability) - [X] Change is maintainable (easy to change, telemetry, documentation) - [X] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [X] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [X] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [X] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Signed-off-by: Juanjo Alvarez <juanjo.alvarezmartinez@datadoghq.com> Co-authored-by: Alberto Vara <alberto.vara@datadoghq.com>
- Loading branch information
Showing
11 changed files
with
477 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
74 changes: 74 additions & 0 deletions
74
ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectSplit.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#include "AspectSplit.h" | ||
#include "Initializer/Initializer.h" | ||
|
||
template<class StrType> | ||
py::list | ||
api_split_text(const StrType& text, const optional<StrType>& separator, const optional<int> maxsplit) | ||
{ | ||
TaintRangeMapType* tx_map = initializer->get_tainting_map(); | ||
if (not tx_map) { | ||
throw py::value_error(MSG_ERROR_TAINT_MAP); | ||
} | ||
|
||
auto split = text.attr("split"); | ||
auto split_result = split(separator, maxsplit); | ||
auto ranges = api_get_ranges(text); | ||
if (not ranges.empty()) { | ||
set_ranges_on_splitted(text, ranges, split_result, tx_map, false); | ||
} | ||
|
||
return split_result; | ||
} | ||
|
||
template<class StrType> | ||
py::list | ||
api_rsplit_text(const StrType& text, const optional<StrType>& separator, const optional<int> maxsplit) | ||
{ | ||
TaintRangeMapType* tx_map = initializer->get_tainting_map(); | ||
if (not tx_map) { | ||
throw py::value_error(MSG_ERROR_TAINT_MAP); | ||
} | ||
|
||
auto rsplit = text.attr("rsplit"); | ||
auto split_result = rsplit(separator, maxsplit); | ||
auto ranges = api_get_ranges(text); | ||
if (not ranges.empty()) { | ||
set_ranges_on_splitted(text, ranges, split_result, tx_map, false); | ||
} | ||
return split_result; | ||
} | ||
|
||
template<class StrType> | ||
py::list | ||
api_splitlines_text(const StrType& text, bool keepends) | ||
{ | ||
TaintRangeMapType* tx_map = initializer->get_tainting_map(); | ||
if (not tx_map) { | ||
throw py::value_error(MSG_ERROR_TAINT_MAP); | ||
} | ||
|
||
auto splitlines = text.attr("splitlines"); | ||
auto split_result = splitlines(keepends); | ||
auto ranges = api_get_ranges(text); | ||
if (not ranges.empty()) { | ||
set_ranges_on_splitted(text, ranges, split_result, tx_map, keepends); | ||
} | ||
return split_result; | ||
} | ||
|
||
void | ||
pyexport_aspect_split(py::module& m) | ||
{ | ||
m.def("_aspect_split", &api_split_text<py::str>, "text"_a, "separator"_a = py::none(), "maxsplit"_a = -1); | ||
m.def("_aspect_split", &api_split_text<py::bytes>, "text"_a, "separator"_a = py::none(), "maxsplit"_a = -1); | ||
m.def("_aspect_split", &api_split_text<py::bytearray>, "text"_a, "separator"_a = py::none(), "maxsplit"_a = -1); | ||
m.def("_aspect_rsplit", &api_rsplit_text<py::str>, "text"_a, "separator"_a = py::none(), "maxsplit"_a = -1); | ||
m.def("_aspect_rsplit", &api_rsplit_text<py::bytes>, "text"_a, "separator"_a = py::none(), "maxsplit"_a = -1); | ||
m.def("_aspect_rsplit", &api_rsplit_text<py::bytearray>, "text"_a, "separator"_a = py::none(), "maxsplit"_a = -1); | ||
// cppcheck-suppress assignBoolToPointer | ||
m.def("_aspect_splitlines", &api_splitlines_text<py::str>, "text"_a, "keepends"_a = false); | ||
// cppcheck-suppress assignBoolToPointer | ||
m.def("_aspect_splitlines", &api_splitlines_text<py::bytes>, "text"_a, "keepends"_a = false); | ||
// cppcheck-suppress assignBoolToPointer | ||
m.def("_aspect_splitlines", &api_splitlines_text<py::bytearray>, "text"_a, "keepends"_a = false); | ||
} |
18 changes: 18 additions & 0 deletions
18
ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectSplit.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#pragma once | ||
|
||
#include "Helpers.h" | ||
|
||
template<class StrType> | ||
py::list | ||
api_split_text(const StrType& text, const optional<StrType>& separator, const optional<int> maxsplit); | ||
|
||
template<class StrType> | ||
py::list | ||
api_rsplit_text(const StrType& text, const optional<StrType>& separator, const optional<int> maxsplit); | ||
|
||
template<class StrType> | ||
py::list | ||
api_splitlines_text(const StrType& text, bool keepends); | ||
|
||
void | ||
pyexport_aspect_split(py::module& m); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
from ddtrace.appsec._iast._taint_tracking import TaintRange | ||
from ddtrace.appsec._iast._taint_tracking import _aspect_rsplit | ||
from ddtrace.appsec._iast._taint_tracking import _aspect_split | ||
from ddtrace.appsec._iast._taint_tracking import _aspect_splitlines | ||
from ddtrace.appsec._iast._taint_tracking._native.taint_tracking import OriginType | ||
from ddtrace.appsec._iast._taint_tracking._native.taint_tracking import Source | ||
from ddtrace.appsec._iast._taint_tracking._native.taint_tracking import get_ranges | ||
from ddtrace.appsec._iast._taint_tracking._native.taint_tracking import set_ranges | ||
from tests.appsec.iast.aspects.test_aspect_helpers import _build_sample_range | ||
|
||
|
||
# These tests are simple ones testing the calls and replacements since most of the | ||
# actual testing is in test_aspect_helpers' test for set_ranges_on_splitted which these | ||
# functions call internally. | ||
def test_aspect_split_simple(): | ||
s = "abc def" | ||
range1 = _build_sample_range(0, 3, "abc") | ||
range2 = _build_sample_range(3, 4, " def") | ||
set_ranges(s, (range1, range2)) | ||
ranges = get_ranges(s) | ||
assert ranges | ||
res = _aspect_split(s) | ||
assert res == ["abc", "def"] | ||
assert get_ranges(res[0]) == [range1] | ||
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))] | ||
|
||
|
||
def test_aspect_rsplit_simple(): | ||
s = "abc def" | ||
range1 = _build_sample_range(0, 3, "abc") | ||
range2 = _build_sample_range(3, 4, " def") | ||
set_ranges(s, (range1, range2)) | ||
ranges = get_ranges(s) | ||
assert ranges | ||
res = _aspect_rsplit(s) | ||
assert res == ["abc", "def"] | ||
assert get_ranges(res[0]) == [range1] | ||
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))] | ||
|
||
|
||
def test_aspect_split_with_separator(): | ||
s = "abc:def" | ||
range1 = _build_sample_range(0, 3, "abc") | ||
range2 = _build_sample_range(3, 4, ":def") | ||
set_ranges(s, (range1, range2)) | ||
ranges = get_ranges(s) | ||
assert ranges | ||
res = _aspect_split(s, ":") | ||
assert res == ["abc", "def"] | ||
assert get_ranges(res[0]) == [range1] | ||
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(":def", "sample_value", OriginType.PARAMETER))] | ||
|
||
|
||
def test_aspect_rsplit_with_separator(): | ||
s = "abc:def" | ||
range1 = _build_sample_range(0, 3, "abc") | ||
range2 = _build_sample_range(3, 4, ":def") | ||
set_ranges(s, (range1, range2)) | ||
ranges = get_ranges(s) | ||
assert ranges | ||
res = _aspect_rsplit(s, ":") | ||
assert res == ["abc", "def"] | ||
assert get_ranges(res[0]) == [range1] | ||
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(":def", "sample_value", OriginType.PARAMETER))] | ||
|
||
|
||
def test_aspect_split_with_maxsplit(): | ||
s = "abc def ghi" | ||
range1 = _build_sample_range(0, 3, "abc") | ||
range2 = _build_sample_range(3, 4, " def") | ||
range3 = _build_sample_range(7, 4, " ghi") | ||
set_ranges(s, (range1, range2, range3)) | ||
ranges = get_ranges(s) | ||
assert ranges | ||
res = _aspect_split(s, maxsplit=1) | ||
assert res == ["abc", "def ghi"] | ||
assert get_ranges(res[0]) == [range1] | ||
assert get_ranges(res[1]) == [ | ||
TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER)), | ||
TaintRange(3, 4, Source(" ghi", "sample_value", OriginType.PARAMETER)), | ||
] | ||
|
||
res = _aspect_split(s, maxsplit=2) | ||
assert res == ["abc", "def", "ghi"] | ||
assert get_ranges(res[0]) == [range1] | ||
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))] | ||
assert get_ranges(res[2]) == [TaintRange(0, 3, Source(" ghi", "sample_value", OriginType.PARAMETER))] | ||
|
||
res = _aspect_split(s, maxsplit=0) | ||
assert res == ["abc def ghi"] | ||
assert get_ranges(res[0]) == [range1, range2, range3] | ||
|
||
|
||
def test_aspect_rsplit_with_maxsplit(): | ||
s = "abc def ghi" | ||
range1 = _build_sample_range(0, 3, "abc") | ||
range2 = _build_sample_range(3, 4, " def") | ||
range3 = _build_sample_range(7, 4, " ghi") | ||
set_ranges(s, (range1, range2, range3)) | ||
ranges = get_ranges(s) | ||
assert ranges | ||
res = _aspect_rsplit(s, maxsplit=1) | ||
assert res == ["abc def", "ghi"] | ||
assert get_ranges(res[0]) == [ | ||
range1, | ||
TaintRange(3, 4, Source(" def", "sample_value", OriginType.PARAMETER)), | ||
] | ||
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" ghi", "sample_value", OriginType.PARAMETER))] | ||
res = _aspect_rsplit(s, maxsplit=2) | ||
assert res == ["abc", "def", "ghi"] | ||
assert get_ranges(res[0]) == [range1] | ||
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))] | ||
assert get_ranges(res[2]) == [TaintRange(0, 3, Source(" ghi", "sample_value", OriginType.PARAMETER))] | ||
|
||
res = _aspect_rsplit(s, maxsplit=0) | ||
assert res == ["abc def ghi"] | ||
assert get_ranges(res[0]) == [range1, range2, range3] | ||
|
||
|
||
def test_aspect_splitlines_simple(): | ||
s = "abc\ndef" | ||
range1 = _build_sample_range(0, 3, "abc") | ||
range2 = _build_sample_range(3, 4, " def") | ||
set_ranges(s, (range1, range2)) | ||
ranges = get_ranges(s) | ||
assert ranges | ||
res = _aspect_splitlines(s) | ||
assert res == ["abc", "def"] | ||
assert get_ranges(res[0]) == [range1] | ||
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))] | ||
|
||
|
||
def test_aspect_splitlines_keepend_true(): | ||
s = "abc\ndef\nhij\n" | ||
range1 = _build_sample_range(0, 4, "abc\n") | ||
range2 = _build_sample_range(4, 4, "def\n") | ||
range3 = _build_sample_range(8, 4, "hij\n") | ||
set_ranges(s, (range1, range2, range3)) | ||
ranges = get_ranges(s) | ||
assert ranges | ||
res = _aspect_splitlines(s, True) | ||
assert res == ["abc\n", "def\n", "hij\n"] | ||
assert get_ranges(res[0]) == [range1] | ||
assert get_ranges(res[1]) == [TaintRange(0, 4, Source("def\n", "sample_value", OriginType.PARAMETER))] | ||
assert get_ranges(res[2]) == [TaintRange(0, 4, Source("hij\n", "sample_value", OriginType.PARAMETER))] |
Oops, something went wrong.