Skip to content

Commit

Permalink
feat: add IAST propagation for string' split, rsplit and splitlines (#…
Browse files Browse the repository at this point in the history
…9113)

## Description

Add propagation for the split/rsplit/splitlines methods.

## Checklist

- [X] Change(s) are motivated and described in the PR description
- [X] Testing strategy is described if automated tests are not included
in the PR
- [X] Risks are described (performance impact, potential for breakage,
maintainability)
- [X] Change is maintainable (easy to change, telemetry, documentation)
- [X] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [X] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [X] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [X] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.

## Reviewer Checklist

- [x] Title is accurate
- [x] All changes are related to the pull request's stated goal
- [x] Description motivates each change
- [x] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [x] Testing strategy adequately addresses listed risks
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] Release note makes sense to a user of the library
- [x] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [x] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)

---------

Signed-off-by: Juanjo Alvarez <juanjo.alvarezmartinez@datadoghq.com>
Co-authored-by: Alberto Vara <alberto.vara@datadoghq.com>
  • Loading branch information
juanjux and avara1986 authored Apr 29, 2024
1 parent 1082722 commit 33bd59f
Show file tree
Hide file tree
Showing 11 changed files with 477 additions and 8 deletions.
3 changes: 3 additions & 0 deletions ddtrace/appsec/_iast/_ast/visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ def __init__(
"format_map": "ddtrace_aspects.format_map_aspect",
"zfill": "ddtrace_aspects.zfill_aspect",
"ljust": "ddtrace_aspects.ljust_aspect",
"split": "ddtrace_aspects.split_aspect",
"rsplit": "ddtrace_aspects.rsplit_aspect",
"splitlines": "ddtrace_aspects.splitlines_aspect",
},
# Replacement function for indexes and ranges
"slices": {
Expand Down
74 changes: 74 additions & 0 deletions ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectSplit.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#include "AspectSplit.h"
#include "Initializer/Initializer.h"

template<class StrType>
py::list
api_split_text(const StrType& text, const optional<StrType>& separator, const optional<int> maxsplit)
{
TaintRangeMapType* tx_map = initializer->get_tainting_map();
if (not tx_map) {
throw py::value_error(MSG_ERROR_TAINT_MAP);
}

auto split = text.attr("split");
auto split_result = split(separator, maxsplit);
auto ranges = api_get_ranges(text);
if (not ranges.empty()) {
set_ranges_on_splitted(text, ranges, split_result, tx_map, false);
}

return split_result;
}

template<class StrType>
py::list
api_rsplit_text(const StrType& text, const optional<StrType>& separator, const optional<int> maxsplit)
{
TaintRangeMapType* tx_map = initializer->get_tainting_map();
if (not tx_map) {
throw py::value_error(MSG_ERROR_TAINT_MAP);
}

auto rsplit = text.attr("rsplit");
auto split_result = rsplit(separator, maxsplit);
auto ranges = api_get_ranges(text);
if (not ranges.empty()) {
set_ranges_on_splitted(text, ranges, split_result, tx_map, false);
}
return split_result;
}

template<class StrType>
py::list
api_splitlines_text(const StrType& text, bool keepends)
{
TaintRangeMapType* tx_map = initializer->get_tainting_map();
if (not tx_map) {
throw py::value_error(MSG_ERROR_TAINT_MAP);
}

auto splitlines = text.attr("splitlines");
auto split_result = splitlines(keepends);
auto ranges = api_get_ranges(text);
if (not ranges.empty()) {
set_ranges_on_splitted(text, ranges, split_result, tx_map, keepends);
}
return split_result;
}

void
pyexport_aspect_split(py::module& m)
{
m.def("_aspect_split", &api_split_text<py::str>, "text"_a, "separator"_a = py::none(), "maxsplit"_a = -1);
m.def("_aspect_split", &api_split_text<py::bytes>, "text"_a, "separator"_a = py::none(), "maxsplit"_a = -1);
m.def("_aspect_split", &api_split_text<py::bytearray>, "text"_a, "separator"_a = py::none(), "maxsplit"_a = -1);
m.def("_aspect_rsplit", &api_rsplit_text<py::str>, "text"_a, "separator"_a = py::none(), "maxsplit"_a = -1);
m.def("_aspect_rsplit", &api_rsplit_text<py::bytes>, "text"_a, "separator"_a = py::none(), "maxsplit"_a = -1);
m.def("_aspect_rsplit", &api_rsplit_text<py::bytearray>, "text"_a, "separator"_a = py::none(), "maxsplit"_a = -1);
// cppcheck-suppress assignBoolToPointer
m.def("_aspect_splitlines", &api_splitlines_text<py::str>, "text"_a, "keepends"_a = false);
// cppcheck-suppress assignBoolToPointer
m.def("_aspect_splitlines", &api_splitlines_text<py::bytes>, "text"_a, "keepends"_a = false);
// cppcheck-suppress assignBoolToPointer
m.def("_aspect_splitlines", &api_splitlines_text<py::bytearray>, "text"_a, "keepends"_a = false);
}
18 changes: 18 additions & 0 deletions ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectSplit.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include "Helpers.h"

template<class StrType>
py::list
api_split_text(const StrType& text, const optional<StrType>& separator, const optional<int> maxsplit);

template<class StrType>
py::list
api_rsplit_text(const StrType& text, const optional<StrType>& separator, const optional<int> maxsplit);

template<class StrType>
py::list
api_splitlines_text(const StrType& text, bool keepends);

void
pyexport_aspect_split(py::module& m);
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once
#include "AspectFormat.h"
#include "AspectOsPathJoin.h"
#include "AspectSplit.h"
#include "Helpers.h"
#include <pybind11/pybind11.h>

Expand All @@ -13,4 +14,6 @@ pyexport_m_aspect_helpers(py::module& m)
pyexport_format_aspect(m_aspect_format);
py::module m_ospath_join = m.def_submodule("aspect_ospath_join", "Aspect os.path.join");
pyexport_ospathjoin_aspect(m_ospath_join);
py::module m_aspect_split = m.def_submodule("aspect_split", "Aspect split");
pyexport_aspect_split(m_aspect_split);
}
6 changes: 6 additions & 0 deletions ddtrace/appsec/_iast/_taint_tracking/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
from ._native.aspect_helpers import parse_params
from ._native.aspect_helpers import set_ranges_on_splitted
from ._native.aspect_ospath_join import _aspect_ospathjoin
from ._native.aspect_split import _aspect_rsplit
from ._native.aspect_split import _aspect_split
from ._native.aspect_split import _aspect_splitlines
from ._native.initializer import active_map_addreses_size
from ._native.initializer import create_context
from ._native.initializer import debug_taint_map
Expand Down Expand Up @@ -82,6 +85,9 @@
"origin_to_str",
"common_replace",
"_aspect_ospathjoin",
"_aspect_split",
"_aspect_rsplit",
"_aspect_splitlines",
"_format_aspect",
"as_formatted_evidence",
"parse_params",
Expand Down
58 changes: 56 additions & 2 deletions ddtrace/appsec/_iast/_taint_tracking/aspects.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@

from .._taint_tracking import TagMappingMode
from .._taint_tracking import TaintRange
from .._taint_tracking import _aspect_ospathjoin # noqa: F401
from .._taint_tracking import _aspect_ospathjoin
from .._taint_tracking import _aspect_rsplit
from .._taint_tracking import _aspect_split
from .._taint_tracking import _aspect_splitlines
from .._taint_tracking import _convert_escaped_text_to_tainted_text
from .._taint_tracking import _format_aspect
from .._taint_tracking import are_all_text_all_ranges
Expand Down Expand Up @@ -45,7 +48,19 @@
_join_aspect = aspects.join_aspect
_slice_aspect = aspects.slice_aspect

__all__ = ["add_aspect", "str_aspect", "bytearray_extend_aspect", "decode_aspect", "encode_aspect"]
__all__ = [
"add_aspect",
"str_aspect",
"bytearray_extend_aspect",
"decode_aspect",
"encode_aspect",
"_aspect_ospathjoin",
"_aspect_split",
"_aspect_rsplit",
"_aspect_splitlines",
]

# TODO: Factorize the "flags_added_args" copypasta into a decorator


def add_aspect(op1, op2):
Expand All @@ -58,6 +73,45 @@ def add_aspect(op1, op2):
return op1 + op2


def split_aspect(orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any) -> str:
if orig_function:
if orig_function != builtin_str:
if flag_added_args > 0:
args = args[flag_added_args:]
return orig_function(*args, **kwargs)
try:
return _aspect_split(*args, **kwargs)
except Exception as e:
iast_taint_log_error("IAST propagation error. split_aspect. {}".format(e))
return args[0].split(*args[1:], **kwargs)


def rsplit_aspect(orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any) -> str:
if orig_function:
if orig_function != builtin_str:
if flag_added_args > 0:
args = args[flag_added_args:]
return orig_function(*args, **kwargs)
try:
return _aspect_rsplit(*args, **kwargs)
except Exception as e:
iast_taint_log_error("IAST propagation error. rsplit_aspect. {}".format(e))
return args[0].rsplit(*args[1:], **kwargs)


def splitlines_aspect(orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any) -> str:
if orig_function:
if orig_function != builtin_str:
if flag_added_args > 0:
args = args[flag_added_args:]
return orig_function(*args, **kwargs)
try:
return _aspect_splitlines(*args, **kwargs)
except Exception as e:
iast_taint_log_error("IAST propagation error. splitlines_aspect. {}".format(e))
return args[0].splitlines(*args[1:], **kwargs)


def str_aspect(orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any) -> str:
if orig_function:
if orig_function != builtin_str:
Expand Down
1 change: 1 addition & 0 deletions ddtrace/appsec/_iast/taint_sinks/ast_taint.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from typing import Callable # noqa:F401


# TODO: we also need a native version of this function!
def ast_function(
func, # type: Callable
flag_added_args, # type: Any
Expand Down
4 changes: 2 additions & 2 deletions tests/appsec/iast/aspects/test_aspect_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def test_common_replace_tainted_bytearray():
assert get_ranges(s2) == [_RANGE1, _RANGE2]


def _build_sample_range(start, end, name): # type: (int, int) -> TaintRange
return TaintRange(start, end, Source(name, "sample_value", OriginType.PARAMETER))
def _build_sample_range(start, length, name): # type: (int, int) -> TaintRange
return TaintRange(start, length, Source(name, "sample_value", OriginType.PARAMETER))


def test_as_formatted_evidence(): # type: () -> None
Expand Down
145 changes: 145 additions & 0 deletions tests/appsec/iast/aspects/test_split_aspect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
from ddtrace.appsec._iast._taint_tracking import TaintRange
from ddtrace.appsec._iast._taint_tracking import _aspect_rsplit
from ddtrace.appsec._iast._taint_tracking import _aspect_split
from ddtrace.appsec._iast._taint_tracking import _aspect_splitlines
from ddtrace.appsec._iast._taint_tracking._native.taint_tracking import OriginType
from ddtrace.appsec._iast._taint_tracking._native.taint_tracking import Source
from ddtrace.appsec._iast._taint_tracking._native.taint_tracking import get_ranges
from ddtrace.appsec._iast._taint_tracking._native.taint_tracking import set_ranges
from tests.appsec.iast.aspects.test_aspect_helpers import _build_sample_range


# These tests are simple ones testing the calls and replacements since most of the
# actual testing is in test_aspect_helpers' test for set_ranges_on_splitted which these
# functions call internally.
def test_aspect_split_simple():
s = "abc def"
range1 = _build_sample_range(0, 3, "abc")
range2 = _build_sample_range(3, 4, " def")
set_ranges(s, (range1, range2))
ranges = get_ranges(s)
assert ranges
res = _aspect_split(s)
assert res == ["abc", "def"]
assert get_ranges(res[0]) == [range1]
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))]


def test_aspect_rsplit_simple():
s = "abc def"
range1 = _build_sample_range(0, 3, "abc")
range2 = _build_sample_range(3, 4, " def")
set_ranges(s, (range1, range2))
ranges = get_ranges(s)
assert ranges
res = _aspect_rsplit(s)
assert res == ["abc", "def"]
assert get_ranges(res[0]) == [range1]
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))]


def test_aspect_split_with_separator():
s = "abc:def"
range1 = _build_sample_range(0, 3, "abc")
range2 = _build_sample_range(3, 4, ":def")
set_ranges(s, (range1, range2))
ranges = get_ranges(s)
assert ranges
res = _aspect_split(s, ":")
assert res == ["abc", "def"]
assert get_ranges(res[0]) == [range1]
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(":def", "sample_value", OriginType.PARAMETER))]


def test_aspect_rsplit_with_separator():
s = "abc:def"
range1 = _build_sample_range(0, 3, "abc")
range2 = _build_sample_range(3, 4, ":def")
set_ranges(s, (range1, range2))
ranges = get_ranges(s)
assert ranges
res = _aspect_rsplit(s, ":")
assert res == ["abc", "def"]
assert get_ranges(res[0]) == [range1]
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(":def", "sample_value", OriginType.PARAMETER))]


def test_aspect_split_with_maxsplit():
s = "abc def ghi"
range1 = _build_sample_range(0, 3, "abc")
range2 = _build_sample_range(3, 4, " def")
range3 = _build_sample_range(7, 4, " ghi")
set_ranges(s, (range1, range2, range3))
ranges = get_ranges(s)
assert ranges
res = _aspect_split(s, maxsplit=1)
assert res == ["abc", "def ghi"]
assert get_ranges(res[0]) == [range1]
assert get_ranges(res[1]) == [
TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER)),
TaintRange(3, 4, Source(" ghi", "sample_value", OriginType.PARAMETER)),
]

res = _aspect_split(s, maxsplit=2)
assert res == ["abc", "def", "ghi"]
assert get_ranges(res[0]) == [range1]
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))]
assert get_ranges(res[2]) == [TaintRange(0, 3, Source(" ghi", "sample_value", OriginType.PARAMETER))]

res = _aspect_split(s, maxsplit=0)
assert res == ["abc def ghi"]
assert get_ranges(res[0]) == [range1, range2, range3]


def test_aspect_rsplit_with_maxsplit():
s = "abc def ghi"
range1 = _build_sample_range(0, 3, "abc")
range2 = _build_sample_range(3, 4, " def")
range3 = _build_sample_range(7, 4, " ghi")
set_ranges(s, (range1, range2, range3))
ranges = get_ranges(s)
assert ranges
res = _aspect_rsplit(s, maxsplit=1)
assert res == ["abc def", "ghi"]
assert get_ranges(res[0]) == [
range1,
TaintRange(3, 4, Source(" def", "sample_value", OriginType.PARAMETER)),
]
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" ghi", "sample_value", OriginType.PARAMETER))]
res = _aspect_rsplit(s, maxsplit=2)
assert res == ["abc", "def", "ghi"]
assert get_ranges(res[0]) == [range1]
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))]
assert get_ranges(res[2]) == [TaintRange(0, 3, Source(" ghi", "sample_value", OriginType.PARAMETER))]

res = _aspect_rsplit(s, maxsplit=0)
assert res == ["abc def ghi"]
assert get_ranges(res[0]) == [range1, range2, range3]


def test_aspect_splitlines_simple():
s = "abc\ndef"
range1 = _build_sample_range(0, 3, "abc")
range2 = _build_sample_range(3, 4, " def")
set_ranges(s, (range1, range2))
ranges = get_ranges(s)
assert ranges
res = _aspect_splitlines(s)
assert res == ["abc", "def"]
assert get_ranges(res[0]) == [range1]
assert get_ranges(res[1]) == [TaintRange(0, 3, Source(" def", "sample_value", OriginType.PARAMETER))]


def test_aspect_splitlines_keepend_true():
s = "abc\ndef\nhij\n"
range1 = _build_sample_range(0, 4, "abc\n")
range2 = _build_sample_range(4, 4, "def\n")
range3 = _build_sample_range(8, 4, "hij\n")
set_ranges(s, (range1, range2, range3))
ranges = get_ranges(s)
assert ranges
res = _aspect_splitlines(s, True)
assert res == ["abc\n", "def\n", "hij\n"]
assert get_ranges(res[0]) == [range1]
assert get_ranges(res[1]) == [TaintRange(0, 4, Source("def\n", "sample_value", OriginType.PARAMETER))]
assert get_ranges(res[2]) == [TaintRange(0, 4, Source("hij\n", "sample_value", OriginType.PARAMETER))]
Loading

0 comments on commit 33bd59f

Please sign in to comment.