/
setup.py
166 lines (136 loc) · 6.02 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""
Analyzer for setup.py python package files
"""
import pprint
from typing import Dict, Any
from packaging.utils import canonicalize_name
from .detections import Detection
from .python.nodes import String, Call, Dictionary
from .base import NodeAnalyzerV2
from ..utils import Analyzer
from ..bases import ASTAnalyzer
from .. import config
logger = config.get_logger(__name__)
@Analyzer.ID("setup_py")
class SetupPy(NodeAnalyzerV2, ASTAnalyzer):
"""Audit setup.py file for anomalies such as code execution or network communication"""
__slots__ = ("hits",)
filename_whitelist = set(["setup.py"])
def __init__(self):
super().__init__()
self.hits = []
def node_Call(self, context):
if context.node.cached_full_name == "setuptools.setup":
self.__parse_setup(context)
yield from []
def __parse_setup(self, context):
# Extract basic package identifiers
copy_fields = ("name", "version", "description", "url")
parsed : Dict[str, Any] = {"packages": []}
for x in copy_fields:
# Convert basic fields into string
if x in context.node.kwargs:
parsed[x] = self.__as_str(context.node.kwargs[x])
if context.node.kwargs.get("cmdclass"):
parsed.update(self.__parse_cmdclass(context))
pkgs = []
if type(context.node.kwargs.get("packages")) == list:
pkgs = [self.__as_str(x) for x in context.node.kwargs["packages"]]
elif type(context.node.kwargs.get("packages")) == str:
pkgs = [context.node.kwargs["packages"]]
for pkg in pkgs:
parsed["packages"].append(pkg)
if type(parsed.get("name")) == str and not self.__check_name(
parsed["name"], pkg
):
sig = Detection(
detection_type="SetupScript",
score=config.get_score_or_default("setup-py-name-shadowing", 100),
message=f"Package '{parsed['name']}' is installed under different name: '{pkg}'",
signature=f"setup_analyzer#pkg_name_mismatch#{parsed['name']}#{pkg}",
tags = {"behavior:setup_py:name_shadowing"}
)
self.hits.append(sig)
main_sig = Detection(
detection_type="SetupScript",
score=config.get_score_or_default("setup-py-setup-script", 0),
message="Setup script found", extra={"parsed": parsed},
signature=f"setup_analyzer#setup_script#{context.visitor.normalized_path}#{context.node.line_no}",
node=context.node
)
self.hits.append(main_sig)
logger.debug(f"Parsed setup.py: f{pprint.pformat(parsed)}")
def __as_str(self, node):
if isinstance(node, String):
return node.value
else:
return repr(node)
def post_analysis(self, analyzer):
if analyzer.path.name != "setup.py":
return
for x in analyzer.hits:
if not isinstance(x, Detection):
continue
elif x.name == "SetupScript":
continue
if "behavior:code_execution" in x.tags:
sig = Detection(
detection_type="SetupScript",
score=config.get_score_or_default("setup-py-code-exec", 100),
message="Code execution capabilities found in a setup.py script",
node=x.node,
line=x.line,
line_no=x.line_no,
signature=f"setup_analyzer#code_execution#{x.signature}",
tags={"behavior:setup_py:code_execution"} | x.tags
)
analyzer.hits.append(sig)
if "behavior:network" in x.tags:
sig = Detection(
detection_type="SetupScript",
score=config.get_score_or_default("setup-py-network", 100),
message="Found code with network communication capabilities in a setup.py script",
node=x.node,
line=x.line,
line_no=x.line_no,
signature=f"setup_analyzer#network_communication#{x.signature}",
tags = {"behavior:setup_py:network_access"} | x.tags
)
analyzer.hits.append(sig)
def __parse_cmdclass(self, context):
parsed = {}
if isinstance(context.node.kwargs["cmdclass"], Dictionary):
parsed["install_hooks"] = [
self.__as_str(x) for x in context.node.kwargs["cmdclass"].keys
]
if "install" in parsed["install_hooks"]:
sig = Detection(
detection_type="SetupScript",
score=config.get_score_or_default("setup-py-install-hook", 500),
message="Setup script hooks to the `setup.py install` command.",
signature=f"setup_analyzer#install_hook#{context.visitor.normalized_path}#{context.node.line_no}",
node=context.node,
tags = {"behavior:setup_py:install_hook"}
)
self.hits.append(sig)
else:
logger.info(f"Unknown setup hook: {context.node.kwargs['cmdclass']}")
return parsed
def __check_name(self, source: str, target: str) -> bool:
"""
Check if tha package name (source) is matching a provided package (target)
This helper functions is used to detect if package is installing itself under different name
For example typosquatting "requestes" could install itself as "requests"
"""
# Normalize name
source = canonicalize_name(source)
target = canonicalize_name(target)
if source == target:
return True
elif target.startswith(source + "."):
# It's a subpackage, e.g. Flask could install `flask.json` package which is ok
return True
else:
return False
def reset_hook(self):
self.hits = []