Skip to content

Commit

Permalink
libct: do not parse passwd and group on every run/exec
Browse files Browse the repository at this point in the history
OCI runtime spec states [1] that the UID, primary GID, and additional
GIDs are all specified as numbers, and also adds that symbolic names
resolution "are left to upper levels to derive". Meaning, runc should
not care about user and group names.

Yet, runc tries to be clever than that, always parsing container's
/etc/passwd and /etc/group. It results in a few things:

1. If UID (or GID) specified can't be found inside container's /etc/passwd
   (or /etc/group), runc (run or exec) errors out.

2. Any additional GIDs specified in container's /etc/group are
   automatically prepended to the list for setgroups(2). Meaning, a user
   can either specify additional GIDs in OCI runtime spec, or
   container's /etc/group entry for a given user.

Looks like (1) is questionable (on a normal Linux system, I can run
programs under any UID (GID), not limited to those listed in /etc/passwd
(/etc/group), and (2) is just an extra mechanism of specifying
additional GIDs.

Let's remove those, hopefully increasing runc performance as well as OCI
spec conformance. With that, also remove most of libcontainer/user
parsers.

The only remaining need to parse /etc/passwd is to set HOME environment
variable for a specified UID, in case it is not. For that, we can use
standard os/user package, which has both libc-based and own ("pure Go")
/etc/passwd parsers.

PS Note that the structures being changed (initConfig and Process) are
never saved to disk as JSON by runc, so there is no compatibility issue
for runc users. This is a breaking change in libcontainer, but we never
promised that libcontainer API will be stable.

[1] https://github.com/opencontainers/runtime-spec/blob/v1.0.2/config.md#posix-platform-user

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
  • Loading branch information
kolyshkin committed Aug 29, 2023
1 parent 3fc35fc commit 4866cd2
Show file tree
Hide file tree
Showing 12 changed files with 53 additions and 1,310 deletions.
3 changes: 2 additions & 1 deletion libcontainer/container_linux.go
Expand Up @@ -711,7 +711,8 @@ func (c *Container) newInitConfig(process *Process) *initConfig {
Config: c.config,
Args: process.Args,
Env: process.Env,
User: process.User,
UID: process.UID,
GID: process.GID,
AdditionalGroups: process.AdditionalGroups,
Cwd: process.Cwd,
Capabilities: process.Capabilities,
Expand Down
75 changes: 23 additions & 52 deletions libcontainer/init_linux.go
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"net"
"os"
"os/user"
"runtime"
"runtime/debug"
"strconv"
Expand All @@ -22,7 +23,6 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runc/libcontainer/utils"
)

Expand Down Expand Up @@ -68,8 +68,9 @@ type initConfig struct {
ProcessLabel string `json:"process_label"`
AppArmorProfile string `json:"apparmor_profile"`
NoNewPrivileges bool `json:"no_new_privileges"`
User string `json:"user"`
AdditionalGroups []string `json:"additional_groups"`
UID uint32 `json:"uid"`
GID uint32 `json:"gid"`
AdditionalGroups []int `json:"additional_groups"`
Config *configs.Config `json:"config"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
Expand Down Expand Up @@ -428,58 +429,26 @@ func syncParentSeccomp(pipe *os.File, seccompFd *os.File) error {

// setupUser changes the groups, gid, and uid for the user inside the container
func setupUser(config *initConfig) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: 0,
Gid: 0,
Home: "/",
}

passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}

groupPath, err := user.GetGroupPath()
if err != nil {
return err
}

execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return err
}

var addGroups []int
if len(config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath)
if err != nil {
return err
}
}

// Rather than just erroring out later in setuid(2) and setgid(2), check
// that the user is mapped here.
if _, err := config.Config.HostUID(execUser.Uid); err != nil {
if _, err := config.Config.HostUID(int(config.UID)); err != nil {
return errors.New("cannot set uid to unmapped user in user namespace")
}
if _, err := config.Config.HostGID(execUser.Gid); err != nil {
if _, err := config.Config.HostGID(int(config.GID)); err != nil {
return errors.New("cannot set gid to unmapped user in user namespace")
}

if config.RootlessEUID {
if config.RootlessEUID && len(config.AdditionalGroups) > 0 {
// We cannot set any additional groups in a rootless container and thus
// we bail if the user asked us to do so. TODO: We currently can't do
// this check earlier, but if libcontainer.Process.User was typesafe
// this might work.
if len(addGroups) > 0 {
return errors.New("cannot set any additional groups in a rootless container")
}
return errors.New("cannot set any additional groups in a rootless container")
}

// Before we change to the container's user make sure that the processes
// STDIO is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(execUser); err != nil {
if err := fixStdioPermissions(config.UID); err != nil {
return err
}

Expand All @@ -495,32 +464,34 @@ func setupUser(config *initConfig) error {
allowSupGroups := !config.RootlessEUID && string(bytes.TrimSpace(setgroups)) != "deny"

if allowSupGroups {
suppGroups := append(execUser.Sgids, addGroups...)
if err := unix.Setgroups(suppGroups); err != nil {
if err := unix.Setgroups(config.AdditionalGroups); err != nil {
return &os.SyscallError{Syscall: "setgroups", Err: err}
}
}

if err := system.Setgid(execUser.Gid); err != nil {
if err := system.Setgid(config.GID); err != nil {
return err
}
if err := system.Setuid(execUser.Uid); err != nil {
if err := system.Setuid(config.UID); err != nil {
return err
}

// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {
return err
// If we didn't get HOME already, try to set it based on the user's HOME.
if _, ok := os.LookupEnv("HOME"); !ok {
u, err := user.LookupId(strconv.Itoa(int(config.UID)))
if err == nil {
if err := os.Setenv("HOME", u.HomeDir); err != nil {
return err
}
}
}
return nil
}

// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified uid.
// The ownership needs to match because it is created outside of the container and needs to be
// localized.
func fixStdioPermissions(u *user.ExecUser) error {
func fixStdioPermissions(uid uint32) error {
var null unix.Stat_t
if err := unix.Stat("/dev/null", &null); err != nil {
return &os.PathError{Op: "stat", Path: "/dev/null", Err: err}
Expand All @@ -533,7 +504,7 @@ func fixStdioPermissions(u *user.ExecUser) error {

// Skip chown if uid is already the one we want or any of the STDIO descriptors
// were redirected to /dev/null.
if int(s.Uid) == u.Uid || s.Rdev == null.Rdev {
if s.Uid == uid || s.Rdev == null.Rdev {
continue
}

Expand All @@ -543,7 +514,7 @@ func fixStdioPermissions(u *user.ExecUser) error {
// that users expect to be able to actually use their console. Without
// this code, you couldn't effectively run as a non-root user inside a
// container and also have a console set up.
if err := file.Chown(u.Uid, int(s.Gid)); err != nil {
if err := file.Chown(int(uid), int(s.Gid)); err != nil {
// If we've hit an EINVAL then s.Gid isn't mapped in the user
// namespace. If we've hit an EPERM then the inode's current owner
// is not mapped in our user namespace (in particular,
Expand Down
14 changes: 6 additions & 8 deletions libcontainer/integration/exec_test.go
Expand Up @@ -395,7 +395,7 @@ func TestAdditionalGroups(t *testing.T) {
Env: standardEnvironment,
Stdin: nil,
Stdout: &stdout,
AdditionalGroups: []string{"plugdev", "audio"},
AdditionalGroups: []int{3333, 99999},
Init: true,
}
err = container.Run(&pconfig)
Expand All @@ -406,13 +406,11 @@ func TestAdditionalGroups(t *testing.T) {

outputGroups := stdout.String()

// Check that the groups output has the groups that we specified
if !strings.Contains(outputGroups, "audio") {
t.Fatalf("Listed groups do not contain the audio group as expected: %v", outputGroups)
}

if !strings.Contains(outputGroups, "plugdev") {
t.Fatalf("Listed groups do not contain the plugdev group as expected: %v", outputGroups)
// Check that the groups output has the groups that we specified.
for _, gid := range pconfig.AdditionalGroups {
if !strings.Contains(outputGroups, strconv.Itoa(gid)) {
t.Errorf("Listed groups do not contain gid %d as expected: %v", gid, outputGroups)
}
}
}

Expand Down
14 changes: 6 additions & 8 deletions libcontainer/integration/execin_test.go
Expand Up @@ -162,7 +162,7 @@ func TestExecInAdditionalGroups(t *testing.T) {
Env: standardEnvironment,
Stdin: nil,
Stdout: &stdout,
AdditionalGroups: []string{"plugdev", "audio"},
AdditionalGroups: []int{4444, 87654},
}
err = container.Run(&pconfig)
ok(t, err)
Expand All @@ -175,13 +175,11 @@ func TestExecInAdditionalGroups(t *testing.T) {

outputGroups := stdout.String()

// Check that the groups output has the groups that we specified
if !strings.Contains(outputGroups, "audio") {
t.Fatalf("Listed groups do not contain the audio group as expected: %v", outputGroups)
}

if !strings.Contains(outputGroups, "plugdev") {
t.Fatalf("Listed groups do not contain the plugdev group as expected: %v", outputGroups)
// Check that the groups output has the groups that we specified.
for _, gid := range pconfig.AdditionalGroups {
if !strings.Contains(outputGroups, strconv.Itoa(gid)) {
t.Errorf("Listed groups do not contain gid %d as expected: %v", gid, outputGroups)
}
}
}

Expand Down
6 changes: 3 additions & 3 deletions libcontainer/process.go
Expand Up @@ -26,13 +26,13 @@ type Process struct {
// Env specifies the environment variables for the process.
Env []string

// User will set the uid and gid of the executing process running inside the container
// UID and GID of the executing process running inside the container
// local to the container's user and group configuration.
User string
UID, GID uint32

// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []string
AdditionalGroups []int

// Cwd will change the processes current working directory inside the container's rootfs.
Cwd string
Expand Down
4 changes: 2 additions & 2 deletions libcontainer/system/syscall_linux_32.go
Expand Up @@ -9,7 +9,7 @@ import (
)

// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
func Setuid(uid uint32) (err error) {
_, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
Expand All @@ -18,7 +18,7 @@ func Setuid(uid int) (err error) {
}

// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
func Setgid(gid uint32) (err error) {
_, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
Expand Down
4 changes: 2 additions & 2 deletions libcontainer/system/syscall_linux_64.go
Expand Up @@ -9,7 +9,7 @@ import (
)

// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
func Setuid(uid uint32) (err error) {
_, _, e1 := unix.RawSyscall(unix.SYS_SETUID, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
Expand All @@ -18,7 +18,7 @@ func Setuid(uid int) (err error) {
}

// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
func Setgid(gid uint32) (err error) {
_, _, e1 := unix.RawSyscall(unix.SYS_SETGID, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
Expand Down

0 comments on commit 4866cd2

Please sign in to comment.