-
Notifications
You must be signed in to change notification settings - Fork 38
/
package_scanner.py
159 lines (122 loc) · 5.83 KB
/
package_scanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import json
import os
import shutil
import sys
import tarsafe
import tempfile
import requests
from guarddog.analyzer.analyzer import Analyzer
from guarddog.scanners.scanner import Scanner
from guarddog.utils.package_info import get_package_info
class PackageScanner(Scanner):
"""
Scans package for attack vectors based on source code and metadata rules
Attributes:
analyzer (Analyzer): Analyzer for source code and metadata rules
"""
def __init__(self) -> None:
self.analyzer = Analyzer()
super(Scanner)
def scan_local(self, path, rules=None) -> dict:
"""
Scans local package
Args:
path (str): path to package
rules (set, optional): Set of rule names to use. Defaults to all rules.
Raises:
Exception: Analyzer exception
Returns:
dict: Analyzer output with rules to results mapping
"""
if rules is not None:
rules = set(rules)
if os.path.exists(path):
if path.endswith('.tar.gz'):
with tempfile.TemporaryDirectory() as tmpdirname:
tarsafe.open(path).extractall(tmpdirname)
return self.analyzer.analyze_sourcecode(tmpdirname, rules=rules)
elif os.path.isdir(path):
return self.analyzer.analyze_sourcecode(path, rules=rules)
else:
raise Exception(f"Path {path} does not exist.")
def _scan_remote(self, name, base_dir, version=None, rules=None, write_package_info=False):
directory = os.path.join(os.path.dirname(os.path.abspath(__file__)), base_dir)
file_path = os.path.join(directory, name)
self.download_package(name, directory, version)
package_info = get_package_info(name)
results = self.analyzer.analyze(file_path, package_info, rules)
if write_package_info:
suffix = f"{name}-{version}" if version is not None else name
with open(os.path.join(results["path"], f'package_info-{suffix}.json'), "w") as file:
file.write(json.dumps(package_info))
return results
def scan_remote(self, name, version=None, rules=None, base_dir=None, write_package_info=False):
"""
Scans a remote package
Args:
* `name` (str): name of the package on PyPI
* `version` (str, optional): version of package (ex. 0.0.1). If not specified, the latest version is assumed.
* `rules` (set, optional): Set of rule names to use. Defaults to all rules.
* `base_dir` (str, optional): directory to use to download package to. If not specified, a temporary folder is created and cleaned up automatically. If not specified, the provided directory is not removed after the scan.
* `write_package_info` (bool, default False): if set to true, the result of the PyPI metadata API is written to a json file
Raises:
Exception: Analyzer exception
Returns:
dict: Analyzer output with rules to results mapping
"""
if (base_dir is not None):
return self._scan_remote(name, base_dir, version, rules, write_package_info)
with tempfile.TemporaryDirectory() as tmpdirname:
# Directory to download compressed and uncompressed package
return self._scan_remote(name, tmpdirname, version, rules, write_package_info)
def download_package(self, package_name, directory, version=None) -> None:
"""Downloads the PyPI distribution for a given package and version
Args:
package_name (str): name of the package
directory (str): directory to download package to
version (str): version of the package
Raises:
Exception: "Received status code: " + <not 200> + " from PyPI"
Exception: "Version " + version + " for package " + package_name + " doesn't exist."
Exception: "Compressed file for package does not exist."
Exception: "Error retrieving package: " + <error message>
Returns:
None
"""
data = get_package_info(package_name)
releases = data["releases"]
if version is None:
version = data["info"]["version"]
if version in releases:
files = releases[version]
url = None
file_extension = None
for file in files:
# Store url to compressed package and appropriate file extension
if file["filename"].endswith(".tar.gz"):
url = file["url"]
file_extension = ".tar.gz"
if file["filename"].endswith(".egg") or file["filename"].endswith(".whl") or file["filename"].endswith(".zip"):
url = file["url"]
file_extension = ".zip"
if url and file_extension:
# Path to compressed package
zippath = os.path.join(directory, package_name + file_extension)
unzippedpath = zippath.removesuffix(file_extension)
self.download_compressed(url, zippath, unzippedpath)
else:
raise Exception(f"Compressed file for {package_name} does not exist on PyPI.")
else:
raise Exception("Version " + version + " for package " + package_name + " doesn't exist.")
def download_compressed(self, url, zippath, unzippedpath):
"""Downloads a compressed file and extracts it
Args:
url (str): download link
zippath (str): path to download compressed file
unzippedpath (str): path to unzip compressed file
"""
response = requests.get(url, stream=True)
with open(zippath, "wb") as f:
f.write(response.raw.read())
shutil.unpack_archive(zippath, unzippedpath)
os.remove(zippath)