Skip to content

Commit

Permalink
libct: do not parse passwd and group on every run/exec
Browse files Browse the repository at this point in the history
OCI runtime spec states [1] that the UID, primary GID, and additional
GIDs are all specified as numbers, and also adds that symbolic names
resolution "are left to upper levels to derive". Meaning, runc should
not care about user and group names.

Yet, runc tries to be clever than that, always parsing container's
/etc/passwd and /etc/group. It results in a few things:

1. If UID (or GID) specified can't be found inside container's /etc/passwd
   (or /etc/group), runc (run or exec) errors out.

2. Any additional GIDs specified in container's /etc/group are
   automatically prepended to the list for setgroups(2). Meaning, a user
   can either specify additional GIDs in OCI runtime spec, or
   container's /etc/group entry for a given user.

Looks like (1) is questionable (on a normal Linux system, I can run
programs under any UID (GID), not limited to those listed in /etc/passwd
(/etc/group), and (2) is just an extra mechanism of specifying
additional GIDs.

Let's remove those, hopefully increasing runc performance as well as OCI
spec conformance.

The only remaining need to parse /etc/passwd is to set HOME environment
variable for a specified UID, in case $HOME is not yet set. Use
user.LookupUid for this case.

PS Note that the structures being changed (initConfig and Process) are
never saved to disk as JSON by runc, so there is no compatibility issue
for runc users. Still, this is a breaking change in libcontainer, but we
never promised that libcontainer API will be stable.

For 3998.

[1] https://github.com/opencontainers/runtime-spec/blob/v1.0.2/config.md#posix-platform-user

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
  • Loading branch information
kolyshkin committed Sep 28, 2023
1 parent b166280 commit 0b9b45d
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 78 deletions.
3 changes: 2 additions & 1 deletion libcontainer/container_linux.go
Expand Up @@ -823,7 +823,8 @@ func (c *Container) newInitConfig(process *Process) *initConfig {
Config: c.config,
Args: process.Args,
Env: process.Env,
User: process.User,
UID: process.UID,
GID: process.GID,
AdditionalGroups: process.AdditionalGroups,
Cwd: process.Cwd,
Capabilities: process.Capabilities,
Expand Down
79 changes: 27 additions & 52 deletions libcontainer/init_linux.go
Expand Up @@ -68,8 +68,9 @@ type initConfig struct {
ProcessLabel string `json:"process_label"`
AppArmorProfile string `json:"apparmor_profile"`
NoNewPrivileges bool `json:"no_new_privileges"`
User string `json:"user"`
AdditionalGroups []string `json:"additional_groups"`
UID int `json:"uid"`
GID int `json:"gid"`
AdditionalGroups []int `json:"additional_groups"`
Config *configs.Config `json:"config"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
Expand Down Expand Up @@ -440,60 +441,42 @@ func syncParentSeccomp(pipe *os.File, seccompFd *os.File) error {
return readSync(pipe, procSeccompDone)
}

// setupUser changes the groups, gid, and uid for the user inside the container
func setupUser(config *initConfig) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: 0,
Gid: 0,
Home: "/",
}

passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}

groupPath, err := user.GetGroupPath()
if err != nil {
return err
}

execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return err
// setHome tries to guess $HOME based on a user's /etc/passwd entry.
// If $HOME is already set, it does nothing.
func setHome(uid int) error {
if _, ok := os.LookupEnv("HOME"); ok {
return nil
}

var addGroups []int
if len(config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath)
if err != nil {
if u, err := user.LookupUid(uid); err == nil {
if err := os.Setenv("HOME", u.Home); err != nil {
return err
}
}
return nil
}

// setupUser changes the groups, gid, and uid for the user inside the container.
func setupUser(config *initConfig) error {
// Rather than just erroring out later in setuid(2) and setgid(2), check
// that the user is mapped here.
if _, err := config.Config.HostUID(execUser.Uid); err != nil {
if _, err := config.Config.HostUID(config.UID); err != nil {
return errors.New("cannot set uid to unmapped user in user namespace")
}
if _, err := config.Config.HostGID(execUser.Gid); err != nil {
if _, err := config.Config.HostGID(config.GID); err != nil {
return errors.New("cannot set gid to unmapped user in user namespace")
}

if config.RootlessEUID {
if config.RootlessEUID && len(config.AdditionalGroups) > 0 {
// We cannot set any additional groups in a rootless container and thus
// we bail if the user asked us to do so. TODO: We currently can't do
// this check earlier, but if libcontainer.Process.User was typesafe
// this might work.
if len(addGroups) > 0 {
return errors.New("cannot set any additional groups in a rootless container")
}
return errors.New("cannot set any additional groups in a rootless container")
}

// Before we change to the container's user make sure that the processes
// STDIO is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(execUser); err != nil {
if err := fixStdioPermissions(config.UID); err != nil {
return err
}

Expand All @@ -509,32 +492,24 @@ func setupUser(config *initConfig) error {
allowSupGroups := !config.RootlessEUID && string(bytes.TrimSpace(setgroups)) != "deny"

if allowSupGroups {
suppGroups := append(execUser.Sgids, addGroups...)
if err := unix.Setgroups(suppGroups); err != nil {
if err := unix.Setgroups(config.AdditionalGroups); err != nil {
return &os.SyscallError{Syscall: "setgroups", Err: err}
}
}

if err := system.Setgid(execUser.Gid); err != nil {
if err := system.Setgid(config.GID); err != nil {
return err
}
if err := system.Setuid(execUser.Uid); err != nil {
if err := system.Setuid(config.UID); err != nil {
return err
}

// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {
return err
}
}
return nil
return setHome(config.UID)
}

// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified uid.
// The ownership needs to match because it is created outside of the container and needs to be
// localized.
func fixStdioPermissions(u *user.ExecUser) error {
func fixStdioPermissions(uid int) error {
var null unix.Stat_t
if err := unix.Stat("/dev/null", &null); err != nil {
return &os.PathError{Op: "stat", Path: "/dev/null", Err: err}
Expand All @@ -547,7 +522,7 @@ func fixStdioPermissions(u *user.ExecUser) error {

// Skip chown if uid is already the one we want or any of the STDIO descriptors
// were redirected to /dev/null.
if int(s.Uid) == u.Uid || s.Rdev == null.Rdev {
if int(s.Uid) == uid || s.Rdev == null.Rdev {
continue
}

Expand All @@ -557,7 +532,7 @@ func fixStdioPermissions(u *user.ExecUser) error {
// that users expect to be able to actually use their console. Without
// this code, you couldn't effectively run as a non-root user inside a
// container and also have a console set up.
if err := file.Chown(u.Uid, int(s.Gid)); err != nil {
if err := file.Chown(uid, int(s.Gid)); err != nil {
// If we've hit an EINVAL then s.Gid isn't mapped in the user
// namespace. If we've hit an EPERM then the inode's current owner
// is not mapped in our user namespace (in particular,
Expand Down
14 changes: 6 additions & 8 deletions libcontainer/integration/exec_test.go
Expand Up @@ -395,7 +395,7 @@ func TestAdditionalGroups(t *testing.T) {
Env: standardEnvironment,
Stdin: nil,
Stdout: &stdout,
AdditionalGroups: []string{"plugdev", "audio"},
AdditionalGroups: []int{3333, 99999},
Init: true,
}
err = container.Run(&pconfig)
Expand All @@ -406,13 +406,11 @@ func TestAdditionalGroups(t *testing.T) {

outputGroups := stdout.String()

// Check that the groups output has the groups that we specified
if !strings.Contains(outputGroups, "audio") {
t.Fatalf("Listed groups do not contain the audio group as expected: %v", outputGroups)
}

if !strings.Contains(outputGroups, "plugdev") {
t.Fatalf("Listed groups do not contain the plugdev group as expected: %v", outputGroups)
// Check that the groups output has the groups that we specified.
for _, gid := range pconfig.AdditionalGroups {
if !strings.Contains(outputGroups, strconv.Itoa(gid)) {
t.Errorf("Listed groups do not contain gid %d as expected: %v", gid, outputGroups)
}
}
}

Expand Down
14 changes: 6 additions & 8 deletions libcontainer/integration/execin_test.go
Expand Up @@ -162,7 +162,7 @@ func TestExecInAdditionalGroups(t *testing.T) {
Env: standardEnvironment,
Stdin: nil,
Stdout: &stdout,
AdditionalGroups: []string{"plugdev", "audio"},
AdditionalGroups: []int{4444, 87654},
}
err = container.Run(&pconfig)
ok(t, err)
Expand All @@ -175,13 +175,11 @@ func TestExecInAdditionalGroups(t *testing.T) {

outputGroups := stdout.String()

// Check that the groups output has the groups that we specified
if !strings.Contains(outputGroups, "audio") {
t.Fatalf("Listed groups do not contain the audio group as expected: %v", outputGroups)
}

if !strings.Contains(outputGroups, "plugdev") {
t.Fatalf("Listed groups do not contain the plugdev group as expected: %v", outputGroups)
// Check that the groups output has the groups that we specified.
for _, gid := range pconfig.AdditionalGroups {
if !strings.Contains(outputGroups, strconv.Itoa(gid)) {
t.Errorf("Listed groups do not contain gid %d as expected: %v", gid, outputGroups)
}
}
}

Expand Down
6 changes: 3 additions & 3 deletions libcontainer/process.go
Expand Up @@ -26,13 +26,13 @@ type Process struct {
// Env specifies the environment variables for the process.
Env []string

// User will set the uid and gid of the executing process running inside the container
// UID and GID of the executing process running inside the container
// local to the container's user and group configuration.
User string
UID, GID int

// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []string
AdditionalGroups []int

// Cwd will change the processes current working directory inside the container's rootfs.
Cwd string
Expand Down
15 changes: 9 additions & 6 deletions utils_linux.go
Expand Up @@ -46,10 +46,10 @@ func getDefaultImagePath() string {
// spec and stdio from the current process.
func newProcess(p specs.Process) (*libcontainer.Process, error) {
lp := &libcontainer.Process{
Args: p.Args,
Env: p.Env,
// TODO: fix libcontainer's API to better support uid/gid in a typesafe way.
User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID),
Args: p.Args,
Env: p.Env,
UID: int(p.User.UID),
GID: int(p.User.GID),
Cwd: p.Cwd,
Label: p.SelinuxLabel,
NoNewPrivileges: &p.NoNewPrivileges,
Expand All @@ -69,8 +69,11 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
lp.Capabilities.Permitted = p.Capabilities.Permitted
lp.Capabilities.Ambient = p.Capabilities.Ambient
}
for _, gid := range p.User.AdditionalGids {
lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10))
if l := len(p.User.AdditionalGids); l > 0 {
lp.AdditionalGroups = make([]int, l)
for i, g := range p.User.AdditionalGids {
lp.AdditionalGroups[i] = int(g)
}
}
for _, rlimit := range p.Rlimits {
rl, err := createLibContainerRlimit(rlimit)
Expand Down

0 comments on commit 0b9b45d

Please sign in to comment.