From 6c4198469025bf037f59d617c5b75229546ce68a Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Sat, 31 Oct 2015 11:09:43 -0700 Subject: [PATCH] std: Refactor process spawning on Unix * Build up the argp/envp pointers while the `Command` is being constructed rather than only when `spawn` is called. This will allow better sharing of code between fork/exec paths. * Rename `child_after_fork` to `exec` and have it only perform the exec half of the spawning. This also means the return type has changed to `io::Error` rather than `!` to represent errors that happen. --- src/libstd/process.rs | 4 +- src/libstd/sys/unix/ext/process.rs | 2 +- src/libstd/sys/unix/process.rs | 325 ++++++++++++++--------------- src/libstd/sys/windows/process.rs | 3 - 4 files changed, 166 insertions(+), 168 deletions(-) diff --git a/src/libstd/process.rs b/src/libstd/process.rs index a8da11420d8e2..c64471cc729af 100644 --- a/src/libstd/process.rs +++ b/src/libstd/process.rs @@ -209,7 +209,9 @@ impl Command { /// Add multiple arguments to pass to the program. #[stable(feature = "process", since = "1.0.0")] pub fn args>(&mut self, args: &[S]) -> &mut Command { - self.inner.args(args.iter().map(AsRef::as_ref)); + for arg in args { + self.arg(arg.as_ref()); + } self } diff --git a/src/libstd/sys/unix/ext/process.rs b/src/libstd/sys/unix/ext/process.rs index 212aeb0406eba..97938b07f8b95 100644 --- a/src/libstd/sys/unix/ext/process.rs +++ b/src/libstd/sys/unix/ext/process.rs @@ -12,8 +12,8 @@ #![stable(feature = "rust1", since = "1.0.0")] -use os::unix::raw::{uid_t, gid_t}; use os::unix::io::{FromRawFd, RawFd, AsRawFd, IntoRawFd}; +use os::unix::raw::{uid_t, gid_t}; use process; use sys; use sys_common::{AsInnerMut, AsInner, FromInner, IntoInner}; diff --git a/src/libstd/sys/unix/process.rs b/src/libstd/sys/unix/process.rs index f881070d24143..ed512b834f83b 100644 --- a/src/libstd/sys/unix/process.rs +++ b/src/libstd/sys/unix/process.rs @@ -13,27 +13,46 @@ use prelude::v1::*; use os::unix::prelude::*; -use collections::HashMap; +use collections::hash_map::{HashMap, Entry}; use env; use ffi::{OsString, OsStr, CString, CStr}; use fmt; use io::{self, Error, ErrorKind}; -use libc::{self, pid_t, c_void, c_int, gid_t, uid_t}; +use libc::{self, pid_t, c_int, gid_t, uid_t, c_char}; +use mem; use ptr; use sys::fd::FileDesc; use sys::fs::{File, OpenOptions}; -use sys::pipe::AnonPipe; use sys::{self, cvt, cvt_r}; //////////////////////////////////////////////////////////////////////////////// // Command //////////////////////////////////////////////////////////////////////////////// -#[derive(Clone)] pub struct Command { + // Currently we try hard to ensure that the call to `.exec()` doesn't + // actually allocate any memory. While many platforms try to ensure that + // memory allocation works after a fork in a multithreaded process, it's + // been observed to be buggy and somewhat unreliable, so we do our best to + // just not do it at all! + // + // Along those lines, the `argv` and `envp` raw pointers here are exactly + // what's gonna get passed to `execvp`. The `argv` array starts with the + // `program` and ends with a NULL, and the `envp` pointer, if present, is + // also null-terminated. + // + // Right now we don't support removing arguments, so there's no much fancy + // support there, but we support adding and removing environment variables, + // so a side table is used to track where in the `envp` array each key is + // located. Whenever we add a key we update it in place if it's already + // present, and whenever we remove a key we update the locations of all + // other keys. program: CString, args: Vec, - env: Option>, // Guaranteed to have no NULs. + env: Option>, + argv: Vec<*const c_char>, + envp: Option>, + cwd: Option, uid: Option, gid: Option, @@ -44,10 +63,13 @@ pub struct Command { impl Command { pub fn new(program: &OsStr) -> Command { let mut saw_nul = false; + let program = os2c(program, &mut saw_nul); Command { - program: os2c(program, &mut saw_nul), + argv: vec![program.as_ptr(), 0 as *const _], + program: program, args: Vec::new(), env: None, + envp: None, cwd: None, uid: None, gid: None, @@ -57,40 +79,79 @@ impl Command { } pub fn arg(&mut self, arg: &OsStr) { - self.args.push(os2c(arg, &mut self.saw_nul)); + // Overwrite the trailing NULL pointer in `argv` and then add a new null + // pointer. + let arg = os2c(arg, &mut self.saw_nul); + self.argv[self.args.len() + 1] = arg.as_ptr(); + self.argv.push(0 as *const _); + + // Also make sure we keep track of the owned value to schedule a + // destructor for this memory. + self.args.push(arg); } - pub fn args<'a, I: Iterator>(&mut self, args: I) { - let mut saw_nul = self.saw_nul; - self.args.extend(args.map(|arg| os2c(arg, &mut saw_nul))); - self.saw_nul = saw_nul; - } - fn init_env_map(&mut self) { + + fn init_env_map(&mut self) -> (&mut HashMap, + &mut Vec<*const c_char>) { if self.env.is_none() { - // Will not add NULs to env: preexisting environment will not contain any. - self.env = Some(env::vars_os().collect()); + let mut map = HashMap::new(); + let mut envp = Vec::new(); + for (k, v) in env::vars_os() { + let s = pair_to_key(&k, &v, &mut self.saw_nul); + envp.push(s.as_ptr()); + map.insert(k, (envp.len() - 1, s)); + } + envp.push(0 as *const _); + self.env = Some(map); + self.envp = Some(envp); } + (self.env.as_mut().unwrap(), self.envp.as_mut().unwrap()) } - pub fn env(&mut self, key: &OsStr, val: &OsStr) { - let k = OsString::from_vec(os2c(key, &mut self.saw_nul).into_bytes()); - let v = OsString::from_vec(os2c(val, &mut self.saw_nul).into_bytes()); - // Will not add NULs to env: return without inserting if any were seen. - if self.saw_nul { - return; + pub fn env(&mut self, key: &OsStr, val: &OsStr) { + let new_key = pair_to_key(key, val, &mut self.saw_nul); + let (map, envp) = self.init_env_map(); + + // If `key` is already present then we we just update `envp` in place + // (and store the owned value), but if it's not there we override the + // trailing NULL pointer, add a new NULL pointer, and store where we + // were located. + match map.entry(key.to_owned()) { + Entry::Occupied(mut e) => { + let (i, ref mut s) = *e.get_mut(); + envp[i] = new_key.as_ptr(); + *s = new_key; + } + Entry::Vacant(e) => { + let len = envp.len(); + envp[len - 1] = new_key.as_ptr(); + envp.push(0 as *const _); + e.insert((len - 1, new_key)); + } } - - self.init_env_map(); - self.env.as_mut() - .unwrap() - .insert(k, v); } + pub fn env_remove(&mut self, key: &OsStr) { - self.init_env_map(); - self.env.as_mut().unwrap().remove(key); + let (map, envp) = self.init_env_map(); + + // If we actually ended up removing a key, then we need to update the + // position of all keys that come after us in `envp` because they're all + // one element sooner now. + if let Some((i, _)) = map.remove(key) { + envp.remove(i); + + for (_, &mut (ref mut j, _)) in map.iter_mut() { + if *j >= i { + *j -= 1; + } + } + } } + pub fn env_clear(&mut self) { - self.env = Some(HashMap::new()) + self.env = Some(HashMap::new()); + self.envp = Some(vec![0 as *const _]); } + pub fn cwd(&mut self, dir: &OsStr) { self.cwd = Some(os2c(dir, &mut self.saw_nul)); } @@ -112,6 +173,18 @@ fn os2c(s: &OsStr, saw_nul: &mut bool) -> CString { }) } +fn pair_to_key(key: &OsStr, value: &OsStr, saw_nul: &mut bool) -> CString { + let (key, value) = (key.as_bytes(), value.as_bytes()); + let mut v = Vec::with_capacity(key.len() + value.len() + 1); + v.extend(key); + v.push(b'='); + v.extend(value); + CString::new(v).unwrap_or_else(|_e| { + *saw_nul = true; + CString::new("foo=bar").unwrap() + }) +} + impl fmt::Debug for Command { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { try!(write!(f, "{:?}", self.program)); @@ -218,20 +291,28 @@ impl Process { return Err(io::Error::new(ErrorKind::InvalidInput, "nul byte found in provided data")); } - let dirp = cfg.cwd.as_ref().map(|c| c.as_ptr()).unwrap_or(ptr::null()); - - let (envp, _a, _b) = make_envp(cfg.env.as_ref()); - let (argv, _a) = make_argv(&cfg.program, &cfg.args); let (input, output) = try!(sys::pipe::anon_pipe()); let pid = unsafe { - match libc::fork() { + match try!(cvt(libc::fork())) { 0 => { drop(input); - Process::child_after_fork(cfg, output, argv, envp, dirp, - in_fd, out_fd, err_fd) + let err = Process::exec(cfg, in_fd, out_fd, err_fd); + let errno = err.raw_os_error().unwrap_or(libc::EINVAL) as u32; + let bytes = [ + (errno >> 24) as u8, + (errno >> 16) as u8, + (errno >> 8) as u8, + (errno >> 0) as u8, + CLOEXEC_MSG_FOOTER[0], CLOEXEC_MSG_FOOTER[1], + CLOEXEC_MSG_FOOTER[2], CLOEXEC_MSG_FOOTER[3] + ]; + // pipe I/O up to PIPE_BUF bytes should be atomic, and then + // we want to be sure we *don't* run at_exit destructors as + // we're being torn down regardless + assert!(output.write(&bytes).is_ok()); + libc::_exit(1) } - n if n < 0 => return Err(Error::last_os_error()), n => n, } }; @@ -306,29 +387,15 @@ impl Process { // allocation). Instead we just close it manually. This will never // have the drop glue anyway because this code never returns (the // child will either exec() or invoke libc::exit) - unsafe fn child_after_fork(cfg: &Command, - mut output: AnonPipe, - argv: *const *const libc::c_char, - envp: *const libc::c_void, - dirp: *const libc::c_char, - in_fd: Stdio, - out_fd: Stdio, - err_fd: Stdio) -> ! { - fn fail(output: &mut AnonPipe) -> ! { - let errno = sys::os::errno() as u32; - let bytes = [ - (errno >> 24) as u8, - (errno >> 16) as u8, - (errno >> 8) as u8, - (errno >> 0) as u8, - CLOEXEC_MSG_FOOTER[0], CLOEXEC_MSG_FOOTER[1], - CLOEXEC_MSG_FOOTER[2], CLOEXEC_MSG_FOOTER[3] - ]; - // pipe I/O up to PIPE_BUF bytes should be atomic, and then we want - // to be sure we *don't* run at_exit destructors as we're being torn - // down regardless - assert!(output.write(&bytes).is_ok()); - unsafe { libc::_exit(1) } + unsafe fn exec(cfg: &Command, + in_fd: Stdio, + out_fd: Stdio, + err_fd: Stdio) -> io::Error { + macro_rules! try { + ($e:expr) => (match $e { + Ok(e) => e, + Err(e) => return e, + }) } // Make sure that the source descriptors are not an stdio descriptor, @@ -337,30 +404,30 @@ impl Process { // suppose we want the child's stderr to be the parent's stdout, and // the child's stdout to be the parent's stderr. No matter which we // dup first, the second will get overwritten prematurely. - let maybe_migrate = |src: Stdio, output: &mut AnonPipe| { + let maybe_migrate = |src: Stdio| { match src { Stdio::Raw(fd @ libc::STDIN_FILENO) | Stdio::Raw(fd @ libc::STDOUT_FILENO) | Stdio::Raw(fd @ libc::STDERR_FILENO) => { - let fd = match cvt_r(|| libc::dup(fd)) { - Ok(fd) => fd, - Err(_) => fail(output), - }; - let fd = FileDesc::new(fd); - fd.set_cloexec(); - Stdio::Raw(fd.into_raw()) - }, - + cvt_r(|| libc::dup(fd)).map(|fd| { + let fd = FileDesc::new(fd); + fd.set_cloexec(); + Stdio::Raw(fd.into_raw()) + }) + } s @ Stdio::None | s @ Stdio::Inherit | - s @ Stdio::Raw(_) => s, + s @ Stdio::Raw(_) => Ok(s), } }; + let in_fd = try!(maybe_migrate(in_fd)); + let out_fd = try!(maybe_migrate(out_fd)); + let err_fd = try!(maybe_migrate(err_fd)); let setup = |src: Stdio, dst: c_int| { match src { - Stdio::Inherit => true, - Stdio::Raw(fd) => cvt_r(|| libc::dup2(fd, dst)).is_ok(), + Stdio::Inherit => Ok(()), + Stdio::Raw(fd) => cvt_r(|| libc::dup2(fd, dst)).map(|_| ()), // If a stdio file descriptor is set to be ignored, we open up // /dev/null into that file descriptor. Otherwise, the first @@ -373,29 +440,18 @@ impl Process { opts.write(dst != libc::STDIN_FILENO); let devnull = CStr::from_ptr(b"/dev/null\0".as_ptr() as *const _); - if let Ok(f) = File::open_c(devnull, &opts) { - cvt_r(|| libc::dup2(f.fd().raw(), dst)).is_ok() - } else { - false - } + File::open_c(devnull, &opts).and_then(|f| { + cvt_r(|| libc::dup2(f.fd().raw(), dst)).map(|_| ()) + }) } } }; - - // Make sure we migrate all source descriptors before - // we start overwriting them - let in_fd = maybe_migrate(in_fd, &mut output); - let out_fd = maybe_migrate(out_fd, &mut output); - let err_fd = maybe_migrate(err_fd, &mut output); - - if !setup(in_fd, libc::STDIN_FILENO) { fail(&mut output) } - if !setup(out_fd, libc::STDOUT_FILENO) { fail(&mut output) } - if !setup(err_fd, libc::STDERR_FILENO) { fail(&mut output) } + try!(setup(in_fd, libc::STDIN_FILENO)); + try!(setup(out_fd, libc::STDOUT_FILENO)); + try!(setup(err_fd, libc::STDERR_FILENO)); if let Some(u) = cfg.gid { - if libc::setgid(u as libc::gid_t) != 0 { - fail(&mut output); - } + try!(cvt(libc::setgid(u as gid_t))); } if let Some(u) = cfg.uid { // When dropping privileges from root, the `setgroups` call @@ -407,9 +463,7 @@ impl Process { // privilege dropping function. let _ = libc::setgroups(0, ptr::null()); - if libc::setuid(u as libc::uid_t) != 0 { - fail(&mut output); - } + try!(cvt(libc::setuid(u as uid_t))); } if cfg.session_leader { // Don't check the error of setsid because it fails if we're the @@ -417,16 +471,15 @@ impl Process { // error, but ignore it anyway. let _ = libc::setsid(); } - if !dirp.is_null() && libc::chdir(dirp) == -1 { - fail(&mut output); + if let Some(ref cwd) = cfg.cwd { + try!(cvt(libc::chdir(cwd.as_ptr()))); } - if !envp.is_null() { - *sys::os::environ() = envp as *const _; + if let Some(ref envp) = cfg.envp { + *sys::os::environ() = envp.as_ptr(); } - #[cfg(not(target_os = "nacl"))] - unsafe fn reset_signal_handling(output: &mut AnonPipe) { - use mem; + // NaCl has no signal support. + if cfg!(not(target_os = "nacl")) { // Reset signal handling so the child process starts in a // standardized state. libstd ignores SIGPIPE, and signal-handling // libraries often set a mask. Child processes inherit ignored @@ -435,23 +488,17 @@ impl Process { // need to clean things up now to avoid confusing the program // we're about to run. let mut set: libc::sigset_t = mem::uninitialized(); - if libc::sigemptyset(&mut set) != 0 || - libc::pthread_sigmask(libc::SIG_SETMASK, &set, ptr::null_mut()) != 0 || - libc::signal( - libc::SIGPIPE, mem::transmute(libc::SIG_DFL) - ) == mem::transmute(libc::SIG_ERR) - { - fail(output); + try!(cvt(libc::sigemptyset(&mut set))); + try!(cvt(libc::pthread_sigmask(libc::SIG_SETMASK, &set, + ptr::null_mut()))); + let ret = libc::signal(libc::SIGPIPE, libc::SIG_DFL); + if ret == libc::SIG_ERR { + return io::Error::last_os_error() } } - #[cfg(target_os = "nacl")] - unsafe fn reset_signal_handling(_output: &mut AnonPipe) { - // NaCl has no signal support. - } - reset_signal_handling(&mut output); - let _ = libc::execvp(*argv, argv); - fail(&mut output) + libc::execvp(cfg.argv[0], cfg.argv.as_ptr()); + io::Error::last_os_error() } pub fn id(&self) -> u32 { @@ -477,54 +524,6 @@ impl Process { } } -fn make_argv(prog: &CString, args: &[CString]) - -> (*const *const libc::c_char, Vec<*const libc::c_char>) -{ - let mut ptrs: Vec<*const libc::c_char> = Vec::with_capacity(args.len()+1); - - // Convert the CStrings into an array of pointers. Also return the - // vector that owns the raw pointers, to ensure they live long - // enough. - ptrs.push(prog.as_ptr()); - ptrs.extend(args.iter().map(|tmp| tmp.as_ptr())); - - // Add a terminating null pointer (required by libc). - ptrs.push(ptr::null()); - - (ptrs.as_ptr(), ptrs) -} - -fn make_envp(env: Option<&HashMap>) - -> (*const c_void, Vec>, Vec<*const libc::c_char>) -{ - // On posixy systems we can pass a char** for envp, which is a - // null-terminated array of "k=v\0" strings. As with make_argv, we - // return two vectors that own the data to ensure that they live - // long enough. - if let Some(env) = env { - let mut tmps = Vec::with_capacity(env.len()); - - for pair in env { - let mut kv = Vec::new(); - kv.extend_from_slice(pair.0.as_bytes()); - kv.push('=' as u8); - kv.extend_from_slice(pair.1.as_bytes()); - kv.push(0); // terminating null - tmps.push(kv); - } - - let mut ptrs: Vec<*const libc::c_char> = - tmps.iter() - .map(|tmp| tmp.as_ptr() as *const libc::c_char) - .collect(); - ptrs.push(ptr::null()); - - (ptrs.as_ptr() as *const _, tmps, ptrs) - } else { - (ptr::null(), Vec::new(), Vec::new()) - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/src/libstd/sys/windows/process.rs b/src/libstd/sys/windows/process.rs index 61cf28be16f91..6a04aa2f2c4f3 100644 --- a/src/libstd/sys/windows/process.rs +++ b/src/libstd/sys/windows/process.rs @@ -74,9 +74,6 @@ impl Command { pub fn arg(&mut self, arg: &OsStr) { self.args.push(arg.to_os_string()) } - pub fn args<'a, I: Iterator>(&mut self, args: I) { - self.args.extend(args.map(OsStr::to_os_string)) - } fn init_env_map(&mut self){ if self.env.is_none() { self.env = Some(env::vars_os().map(|(key, val)| {