From 1f6caa5493586afad6c667ee98f398806226b998 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Mon, 25 Jun 2018 13:37:17 -0700 Subject: [PATCH] Fix CPU bursting settings --- executor/runtime/docker/docker.go | 29 +++++++--------------- executor/runtime/types/types.go | 41 +++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 20 deletions(-) diff --git a/executor/runtime/docker/docker.go b/executor/runtime/docker/docker.go index 733b32c42..72f826700 100644 --- a/executor/runtime/docker/docker.go +++ b/executor/runtime/docker/docker.go @@ -28,7 +28,6 @@ import ( runtimeTypes "github.com/Netflix/titus-executor/executor/runtime/types" "github.com/Netflix/titus-executor/nvidia" vpcTypes "github.com/Netflix/titus-executor/vpc/types" - "github.com/aws/aws-sdk-go/aws/arn" "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/events" @@ -192,11 +191,6 @@ var ( envFileTemplate = template.Must(template.New("").Parse(envFileTemplateStr)) ) -// ErrMissingIAMRole indicates that the Titus job was submitted without an IAM role -// This is a transition because previously the protobuf had this marked as an optional field -// and it's a temporary measure during protocol evolution. -var ErrMissingIAMRole = errors.New("IAM Role Missing") - // NoEntrypointError indicates that the Titus job does not have an entrypoint, or command var NoEntrypointError = &runtimeTypes.BadEntryPointError{Reason: errors.New("Image, and job have no entrypoint, or command")} @@ -335,7 +329,7 @@ func setupLoggingInfra(dockerRuntime *DockerRuntime) error { } func maybeSetCFSBandwidth(cfsBandwidthMode string, cfsBandwidthPeriod uint64, c *runtimeTypes.Container, hostCfg *container.HostConfig) { - cpuBurst := c.TitusInfo.GetAllowNetworkBursting() + cpuBurst := c.TitusInfo.GetAllowCpuBursting() logEntry := log.WithField("taskID", c.TaskID).WithField("bandwidthMode", cfsBandwidthMode).WithField("cpuBurst", cpuBurst) if cpuBurst { @@ -344,6 +338,11 @@ func maybeSetCFSBandwidth(cfsBandwidthMode string, cfsBandwidthPeriod uint64, c return } + if c.Resources.CPU == 0 { + logEntry.WithField("shares", 1).Info("Setting shares as this is an opportunistic workload") + hostCfg.CPUShares = 1 + } + switch cfsBandwidthMode { case bandwidthMode: setCFSBandwidth(logEntry, cfsBandwidthPeriod, c, hostCfg) @@ -398,17 +397,11 @@ func (r *DockerRuntime) dockerConfig(c *runtimeTypes.Container, binds []string, return nil, nil, err } - if c.TitusInfo.IamProfile == nil || c.TitusInfo.GetIamProfile() == "" { - return nil, nil, ErrMissingIAMRole - } - - _, err = arn.Parse(c.TitusInfo.GetIamProfile()) + c.Env["TITUS_IAM_ROLE"], err = c.GetIamProfile() if err != nil { return nil, nil, err } - c.Env["TITUS_IAM_ROLE"] = c.TitusInfo.GetIamProfile() - hostname := strings.ToLower(c.TaskID) containerCfg := &container.Config{ Image: c.QualifiedImageName(), @@ -496,12 +489,8 @@ func (r *DockerRuntime) dockerConfig(c *runtimeTypes.Container, binds []string, hostCfg.NetworkMode = container.NetworkMode("none") } - if c.TitusInfo.GetBatch() { - if c.TitusInfo.GetPassthroughAttributes()["titusParameter.agent.batchPriority"] == "idle" { - c.Env["TITUS_BATCH"] = "idle" - } else { - c.Env["TITUS_BATCH"] = "true" - } + if batch := c.GetBatch(); batch != nil { + c.Env["TITUS_BATCH"] = *batch } // This must got after all setup diff --git a/executor/runtime/types/types.go b/executor/runtime/types/types.go index bbe51f9d4..821df8ae5 100644 --- a/executor/runtime/types/types.go +++ b/executor/runtime/types/types.go @@ -13,10 +13,18 @@ import ( "github.com/Netflix/titus-executor/executor/metatron" vpcTypes "github.com/Netflix/titus-executor/vpc/types" // The purpose of this is to tell gometalinter to keep vendoring this package + "errors" + _ "github.com/Netflix/titus-api-definitions/src/main/proto/netflix/titus" "github.com/Netflix/titus-executor/executor/dockershellparser" + "github.com/aws/aws-sdk-go/aws/arn" ) +// ErrMissingIAMRole indicates that the Titus job was submitted without an IAM role +// This is a transition because previously the protobuf had this marked as an optional field +// and it's a temporary measure during protocol evolution. +var ErrMissingIAMRole = errors.New("IAM Role Missing") + // RegistryImageNotFoundError represents an error where an image // did not exist in the registry type RegistryImageNotFoundError struct { @@ -178,6 +186,39 @@ func (c *Container) GetSortedEnvArray() []string { } +// GetIamProfile retrieves, and validates the format of the IAM profile +func (c *Container) GetIamProfile() (string, error) { + if c.TitusInfo.IamProfile == nil || c.TitusInfo.GetIamProfile() == "" { + return "", ErrMissingIAMRole + } + if _, err := arn.Parse(c.TitusInfo.GetIamProfile()); err != nil { + return "", err + } + + return c.TitusInfo.GetIamProfile(), nil +} + +// GetBatch returns what the environment variable TITUS_BATCH should be set to. +// if it returns nil, TITUS_BATCH should be unset +func (c *Container) GetBatch() *string { + idleStr := "idle" + trueStr := "true" + + if c.Resources.CPU == 0 { + return &idleStr + } + + if !c.TitusInfo.GetBatch() { + return nil + } + + if c.TitusInfo.GetPassthroughAttributes()["titusParameter.agent.batchPriority"] == "idle" { + return &idleStr + } + + return &trueStr +} + // Resources specify constraints to be applied to a Container type Resources struct { Mem int64 // in MiB