/
quarantine.cpp
439 lines (375 loc) · 17 KB
/
quarantine.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <iostream>
#include <string>
#include <algorithm>
#include <sys/types.h>
#include <sys/stat.h>
#include <vector>
#include <sstream>
#include <regex>
#include <unistd.h>
#include <sched.h>
#include <signal.h>
#include <sys/wait.h>
#include <sys/syscall.h>
#include <sys/mount.h>
#include <fstream>
#include <sys/capability.h>
#include <grp.h>
#define STACK_SIZE (1024*1024)
struct Range {
int start_host, end_host, start_guest;
};
// global variables
static std::string rootfs;
static int bin = 0;
static Range urange {-1,-1,-1};
static Range grange {-1,-1,-1};
static std::vector<int> supp_ids;
static int uid = -1;
static int gid = -1;
static uid_t egid, euid;
static bool unshare_ns = false;
static bool has_cap_sys_admin = false;
static bool has_cap_setuid = false;
static bool has_cap_setgid = false;
static void print_help_and_exit() {
std::cout << "\nUsage: quarantine [OPTIONS] [BINARY and its ARGS]\n\n" \
<< "--rootfs: (mandatory) Specify a rootfs for the container.\n"
<< "--urange: Specify a urange start_host:end_host,start_guest.\n" \
<< "--grange: Specify a grange start_host:end_host,start_guest.\n" \
<< "--uid: Specify the desired UID in the container.\n" \
<< "--gid: Specify the desired GID in the container.\n" \
<< "--supp: Append supplementary groups from the specified grange to the process running in the container.\n" \
<< std::endl;
exit(0);
}
static void print_error_and_exit(const std::string err) {
std::cerr << err << std::endl;
if (errno != 0) std::cerr << "errno: " << std::strerror(errno) << std::endl;
std::cerr << "Exiting..." << std::endl;
exit(1);
}
static std::vector<std::string> str_split(const std::string &str, const char delim) {
std::stringstream ss(str);
std::string s;
std::vector<std::string> arr;
while (std::getline(ss, s, delim)) {
arr.push_back(s);
}
return arr;
}
static bool is_not_digit(char c) {
return !std::isdigit(c);
}
static bool is_valid_number(std::string s) {
return !s.empty() && std::find_if(s.begin(),s.end(), is_not_digit) == s.end();
}
static int pivot_root(const char* new_root, const char* old_root) {
#ifdef __NR_pivot_root
return syscall(__NR_pivot_root, new_root, old_root);
#else
return -1;
#endif
}
static void parse_args(int argc, char** argv) {
int argvPos = 0;
const std::regex range_regex("([0-9])+:([0-9])+,([0-9])+");
const std::regex bin_regex("(|\\.)\\/.+");
std::smatch base_match;
struct stat buf;
// obligatory help when simply executing the binary
if (argc == 1) {
print_help_and_exit();
}
while (argc-1 > argvPos && bin == 0) {
argvPos++;
std::string arg = argv[argvPos];
// help
if (arg.compare("--help") == 0 || arg.compare("-h") == 0) {
print_help_and_exit();
}
// urange
else if (arg.compare("--urange") == 0) {
if (has_cap_setuid){
if (argvPos+1 < argc) {
std::string urange_str = argv[argvPos+1];
if (std::regex_match(urange_str, base_match, range_regex)) {
std::vector<std::string> urange_split1 = str_split(urange_str, ':');
std::vector<std::string> urange_split2 = str_split(urange_split1[1], ',');
if (is_valid_number(urange_split1[0])) urange.start_host = std::stoi(urange_split1[0]);
if (is_valid_number(urange_split2[0])) urange.end_host = std::stoi(urange_split2[0]);
if (is_valid_number(urange_split2[1])) urange.start_guest = std::stoi(urange_split2[1]);
unshare_ns = true;
} else {
print_error_and_exit("\'" + urange_str + "\' is not a valid urange.\nDesired format: start_host:end_host,start_guest");
}
} else print_error_and_exit("Please specify a urange.");
} else print_error_and_exit("To set a urange you need at least CAP_SET_UID.");
}
// grange
else if (arg.compare("--grange") == 0) {
if (has_cap_setgid) {
if (argvPos+1 < argc) {
std::string grange_str = argv[argvPos+1];
if (std::regex_match(grange_str, base_match, range_regex)) {
std::vector<std::string> grange_split1 = str_split(grange_str, ':');
std::vector<std::string> grange_split2 = str_split(grange_split1[1], ',');
if (is_valid_number(grange_split1[0])) grange.start_host = std::stoi(grange_split1[0]);
if (is_valid_number(grange_split2[0])) grange.end_host = std::stoi(grange_split2[0]);
if (is_valid_number(grange_split2[1])) grange.start_guest = std::stoi(grange_split2[1]);
unshare_ns = true;
} else {
print_error_and_exit("\'" + grange_str + "\' is not a valid urange.\nDesired format: start_host:end_host,start_guest");
}
} else print_error_and_exit("Please specify a grange.");
} else print_error_and_exit("To set a grange you need at least CAP_SET_GID.");
}
// uid
else if (arg.compare("--uid") == 0) {
if (argvPos+1 < argc) {
std::string uid_str = argv[argvPos+1];
if (is_valid_number(uid_str)) {
uid = std::stoi(uid_str);
unshare_ns = true;
} else print_error_and_exit("Please specify a valid UID.");
} else print_error_and_exit("Please specify a UID.");
}
// gid
else if (arg.compare("--gid") == 0) {
if (argvPos+1 < argc) {
std::string gid_str = argv[argvPos+1];
if (is_valid_number(gid_str)) {
gid = std::stoi(gid_str);
unshare_ns = true;
} else print_error_and_exit("Please specify a valid GID.");
} else print_error_and_exit("Please specify a GID.");
}
// supp
else if (arg.compare("--supp") == 0) {
if (has_cap_setgid) {
if (argvPos+1 < argc) {
std::string supp_ids_str = argv[argvPos+1];
std::vector<std::string> supp_ids_tmp = str_split(supp_ids_str, ',');
for (auto &s: supp_ids_tmp) {
if (!is_valid_number(s)) print_error_and_exit("\'" + s + "\' is not a valid supplementary number.");
supp_ids.push_back(std::stoi(s));
}
} else print_error_and_exit("Please specify a list of supplementary gids: id_1,id_2,...,id_n");
} else print_error_and_exit("You do not have the necessary privileges to set multiple gids inside the container. This implies that you define a grange as well.");
}
// rootfs
else if (arg.compare("--rootfs") == 0) {
if (argvPos+1 < argc) {
std::string rootfs_dir = argv[argvPos+1];
if (rootfs_dir.compare("/") == 0) print_error_and_exit("The specified rootfs shouldn't be host root.");
if (stat(rootfs_dir.c_str(), &buf) == 0) {
if (stat((rootfs_dir+"/proc").c_str(), &buf) == 0 && stat((rootfs_dir+"/sys").c_str(), &buf) == 0 && stat((rootfs_dir+"/dev").c_str(), &buf) == 0) {
rootfs = rootfs_dir;
} else print_error_and_exit("The specified rootfs must include /dev /proc and /sys.");
} else print_error_and_exit("The specified rootfs does not exist.");
} else print_error_and_exit("Please specify a valid rootfs.");
}
// binary (or, its pos in argv for execve)
else if (std::regex_match(arg, base_match, bin_regex)) {
std::string prev_arg = argv[argvPos-1];
if (prev_arg.compare("--rootfs") != 0) {
// only if it hassn't been set yet, to make it possible to do for example /bin/file /bin/bash
if (bin == 0) {
bin = argvPos;
}
}
}
}
if (has_cap_sys_admin || unshare_ns) {
// check for mandatory rootfs
if (rootfs.empty()) print_error_and_exit("Please specify a rootfs using --rootfs");
// check if process euid and rootfs are equal to ensure that rootfs can be used after cloning
if (stat(rootfs.c_str(), &buf) == 0 && unshare_ns) {
if (urange.start_guest)
if (buf.st_uid != euid) print_error_and_exit("The rootfs should belong to the executing user. If you used sudo, this effectively means you're root and thus it should belong to root.");
}
// check for mandatory binary
if (bin == 0) print_error_and_exit("Please specify a binary with its path relative to the new rootfs: /path/to/bin or ./path/to/bin");
if (urange.start_host != -1) {
// check if the provided UID is in range
if (uid != -1 && (uid < urange.start_guest || uid > urange.start_guest + (urange.end_host - urange.start_host))) {
print_error_and_exit("The specified UID \'" + std::to_string(uid) + "\' is not within the specified guest urange \'" \
+ std::to_string(urange.start_guest) + "-" + std::to_string(urange.start_guest + (urange.end_host - urange.start_host)) + "\'");
}
}
if (grange.start_host != -1) {
// check if the provided GID is in range
if (gid != -1 && (gid < grange.start_guest || gid > grange.start_guest + (grange.end_host - grange.start_host))) {
print_error_and_exit("The specified GID \'" + std::to_string(gid) + "\' is not within the specified guest grange \'" \
+ std::to_string(grange.start_guest) + "-" + std::to_string(grange.start_guest + (grange.end_host - grange.start_host)) + "\'");
}
// check if supplementary UIDs are in range
for (auto &supp_id: supp_ids) {
if (supp_id < grange.start_guest || supp_id > grange.start_guest + (grange.end_host - grange.start_host)) {
print_error_and_exit("The specified supplementary GID \'" + std::to_string(supp_id) + "\' is not within the specified guest grange \'" \
+ std::to_string(grange.start_guest) + "-" + std::to_string(grange.start_guest + (grange.end_host - grange.start_host)) + "\'");
}
}
} else if (supp_ids.size() > 0) print_error_and_exit("Please specify a grange if you want to set supplementary groups.");
} else print_error_and_exit("You either need to be privileged or specify a uid, gid, grange or urange.");
}
static void setup_mapping(int child_pid) {
int ns_start_guest_uid, ns_start_host_uid, ns_length_uid, ns_start_guest_gid, ns_start_host_gid, ns_length_gid;
std::string uid_map_s, gid_map_s;
// set up variables for uid_map
if (urange.start_guest != -1) {
// values for when a urange was defined
ns_start_host_uid = urange.start_host;
ns_start_guest_uid = urange.start_guest;
ns_length_uid = 1 + urange.end_host - urange.start_host;
} else {
// values for when a urange wasn't defined
ns_start_host_uid = euid;
if (uid != -1) {
ns_start_guest_uid = uid;
} else ns_start_guest_uid = 0;
ns_length_uid = 1;
}
// set up variables for gid_map
if (grange.start_guest != -1) {
// values for when a grange was defined
ns_start_host_gid = grange.start_host;
ns_start_guest_gid = grange.start_guest;
ns_length_gid = 1 + grange.end_host - grange.start_host;
} else {
// values for when a grange wasn't defined
ns_start_host_gid = egid;
if (gid != -1) {
ns_start_guest_gid = gid;
} else ns_start_guest_gid = 0;
ns_length_gid = 1;
}
// build map strings
uid_map_s = std::to_string(ns_start_guest_uid) + " " + std::to_string(ns_start_host_uid) + " " + std::to_string(ns_length_uid);
gid_map_s = std::to_string(ns_start_guest_gid) + " " + std::to_string(ns_start_host_gid) + " " + std::to_string(ns_length_gid);
if (!has_cap_setgid) {
// set setgroups deny
std::ofstream self_setgroups("/proc/" + std::to_string(child_pid) + "/setgroups", std::ofstream::out);
self_setgroups << "deny";
self_setgroups.close();
if (!self_setgroups) print_error_and_exit("Error writing deny into setgroups.");
}
// set gid_map first, as when uid_map mapping start_guest to something other than 0 you drop privs and can't write to uid_map
std::ofstream self_gid_map("/proc/" + std::to_string(child_pid) + "/gid_map", std::ofstream::out);
self_gid_map << gid_map_s;
self_gid_map.close();
if (!self_gid_map) print_error_and_exit("Error writing gid_map.");
// set uid_map
std::ofstream self_uid_map("/proc/" + std::to_string(child_pid) + "/uid_map", std::ofstream::out);
self_uid_map << uid_map_s;
self_uid_map.close();
if (!self_uid_map) print_error_and_exit("Error writing uid_map.");
}
static void set_supplementary_groups () {
gid_t supp_ids_list[supp_ids.size()];
int supp_c = 0;
// create supplementary group list and set it if specified
for (auto &supp: supp_ids) {
supp_ids_list[supp_c] = supp;
supp_c++;
}
// set supp groups
if (setgroups(supp_ids.size(), supp_ids_list) < 0) print_error_and_exit("Couldn't set supplementary groups.");
}
static void set_uid_gid() {
uid_t set_uid;
gid_t set_gid;
// set uid value
if (uid != -1) {
set_uid = uid;
} else if (urange.start_guest != -1) {
set_uid = urange.start_guest;
} else set_uid = 0;
// set gid value
if (gid != -1) {
set_gid = gid;
} else if (grange.start_guest != -1) {
set_gid = grange.start_guest;
} else set_gid = 0;
// set gid and uid
if (setresgid(set_gid, set_gid, set_gid) < 0) print_error_and_exit("Error setting resgid.");
if (setresuid(set_uid, set_uid, set_uid) < 0) print_error_and_exit("Error setting resuid.");
}
int ns_setup(void* args) {
char** main_argv = (char**)(args);
struct stat buf;
// wait for mapping
// could make this better with signals but this will do for now
sleep(1);
// set supplementary groups if specified
if (supp_ids.size() > 0) set_supplementary_groups();
// set hostname
if (sethostname("wuhan", 9) < 0) print_error_and_exit("Error setting hostname.");
// set up filesystem
// initial mount as private for ns
if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) < 0) print_error_and_exit("Failed re-mounting host root as private.");
// mount rootfs over itself to ensure it's not on the same filesystem as host /
if (mount(rootfs.c_str(), rootfs.c_str(), NULL, MS_BIND | MS_REC, NULL) < 0) print_error_and_exit("Failed to mount rootfs over itself.");
// create oldroot dir in rootfs if not already present
if (stat((rootfs+"/oldroot").c_str(), &buf) == -1) {
if (mkdir((rootfs+"/oldroot").c_str(), 0755)) print_error_and_exit("Failed mkdir: oldroot.");
}
// chdir to rootfs
if (chdir(rootfs.c_str()) != 0) print_error_and_exit("Failed chdir to rootfs");
// pivot_root to rootfs and move old root to oldroot
if (pivot_root(".", "oldroot")) print_error_and_exit("Failed pivot_root.");
// chdir to new root
if (chdir("/") != 0) print_error_and_exit("Failed chdir to new root.");
// mount proc, sys, and dev
if (mount("proc", "/proc", "proc", MS_NOSUID | MS_NODEV | MS_NOEXEC, NULL) < 0) print_error_and_exit("Failed to mount proc.");
if (mount("sysfs", "/sys", "sysfs", MS_NOSUID | MS_NODEV | MS_NOEXEC, NULL) < 0) print_error_and_exit("Failed to mount sys.");
// devtmpfs can't be mounted in a user namespace as of date so bind mount in that case
if (unshare_ns) {
if (mount("/oldroot/dev", "/dev", "devtmpfs", MS_BIND | MS_REC, NULL) < 0) print_error_and_exit("Failed to bind-mount dev.");
} else {
if (mount("devtmpfs", "/dev", "devtmpfs", MS_NOSUID, NULL) < 0) print_error_and_exit("Failed to mount dev.");
}
// umount oldroot lazily
if (umount2("/oldroot", MNT_DETACH) < 0) print_error_and_exit("Failed to umount oldroot lazily.");
// rm oldroot
if (rmdir("oldroot") < 0) print_error_and_exit("Failed to rmdir oldroot.");
// set uid / gid
set_uid_gid();
// exec specified binary with args
// after changing uid/gid some file access errors can happen, since the rootfs doesn't get chown'd, might change that later
// for example: /bin/bash tries to open .bashrc but obviously can't
execv(main_argv[bin], &main_argv[bin]);
return 0;
}
int main(int argc, char** argv) {
int clone_flags = CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWCGROUP | CLONE_NEWNET | CLONE_NEWPID;
pid_t pid;
char *stack = (char*)std::malloc(STACK_SIZE);
if (stack == NULL) print_error_and_exit("malloc error");
cap_t caps;
char *txt_caps;
// get euid and egid since they get lost once we clone into a new ns
euid = geteuid();
egid = getegid();
// get caps to check caps appropiately for urange, grange, instead of for only root or not
caps = cap_get_proc();
if (caps == NULL) print_error_and_exit("Error getting caps.");
txt_caps = cap_to_text(caps, NULL);
if (txt_caps == NULL) print_error_and_exit("Error converting caps to text.");
if (std::string(txt_caps).find("cap_sys_admin") != std::string::npos) has_cap_sys_admin = true;
if (std::string(txt_caps).find("cap_setuid") != std::string::npos) has_cap_setuid = true;
if (std::string(txt_caps).find("cap_setgid") != std::string::npos) has_cap_setgid = true;
if (cap_free(caps) == -1) print_error_and_exit("Error freeing caps.");
parse_args(argc, argv);
if (unshare_ns) clone_flags |= CLONE_NEWUSER;
// clone and unshare all namespaces specified in clone_flags
pid = clone(ns_setup, stack + STACK_SIZE, clone_flags | SIGCHLD, (void*)(argv));
if (pid < 0) print_error_and_exit("Couldn't clone.");
// set up mapping
if (unshare_ns) setup_mapping(pid);
waitpid(pid, NULL, 0);
return 0;
}