Permalink
Browse files

Specify CPU Request for the SDK Server Sidecar

This provides the mechanism (and defaults) for being able to set
both the CPU request, and CPU limits for the SDK Server `GameServer` sidecar.

I've only set the Request level, as it seems that the major issue is not
CPU usage, but actually how the scheduler allots space for the sidecar
(by default 100/0.1 vCPU is alloted to each container.

After discussion, the CPU request has been set to 30m, but is also
configurable via the helm chart.

I've not set a CPU limit, as I found when setting a low (<= 20m) CPU limit
on the sidecar it mostly stopped working. But if people want to experiment with
this, it is also configurable via the Helm chart.

Closes #344
  • Loading branch information...
markmandel committed Oct 17, 2018
1 parent 1923a41 commit c30c70cbdf12173cb92371380060bcac0e40c6b6
@@ -71,6 +71,7 @@ Documentation and usage guides on how to develop and host dedicated game servers
### Advanced
- [Scheduling and Autoscaling](./docs/scheduling_autoscaling.md)
- [Limiting CPU/Memory](./docs/limiting_resources.md)
## Get involved
@@ -40,20 +40,23 @@ import (
"github.com/spf13/pflag"
"github.com/spf13/viper"
extclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
)
const (
sidecarFlag = "sidecar"
pullSidecarFlag = "always-pull-sidecar"
minPortFlag = "min-port"
maxPortFlag = "max-port"
certFileFlag = "cert-file"
keyFileFlag = "key-file"
workers = 2
defaultResync = 30 * time.Second
sidecarImageFlag = "sidecar-image"
sidecarCPURequestFlag = "sidecar-cpu-request"
sidecarCPULimitFlag = "sidecar-cpu-limit"
pullSidecarFlag = "always-pull-sidecar"
minPortFlag = "min-port"
maxPortFlag = "max-port"
certFileFlag = "cert-file"
keyFileFlag = "key-file"
workers = 2
defaultResync = 30 * time.Second
)
var (
@@ -63,6 +66,9 @@ var (
// main starts the operator for the gameserver CRD
func main() {
ctlConf := parseEnvFlags()
logger.WithField("version", pkg.Version).
WithField("ctlConf", ctlConf).Info("starting gameServer operator...")
if err := ctlConf.validate(); err != nil {
logger.WithError(err).Fatal("Could not create controller from environment or flags")
}
@@ -88,14 +94,15 @@ func main() {
}
health := healthcheck.NewHandler()
wh := webhooks.NewWebHook(ctlConf.certFile, ctlConf.keyFile)
wh := webhooks.NewWebHook(ctlConf.CertFile, ctlConf.KeyFile)
agonesInformerFactory := externalversions.NewSharedInformerFactory(agonesClient, defaultResync)
kubeInformationFactory := informers.NewSharedInformerFactory(kubeClient, defaultResync)
allocationMutex := &sync.Mutex{}
gsController := gameservers.NewController(wh, health, allocationMutex,
ctlConf.minPort, ctlConf.maxPort, ctlConf.sidecarImage, ctlConf.alwaysPullSidecar,
ctlConf.MinPort, ctlConf.MaxPort, ctlConf.SidecarImage, ctlConf.AlwaysPullSidecar,
ctlConf.SidecarCPURequest, ctlConf.SidecarCPULimit,
kubeClient, kubeInformationFactory, extClient, agonesClient, agonesInformerFactory)
gsSetController := gameserversets.NewController(wh, health, allocationMutex,
kubeClient, extClient, agonesClient, agonesInformerFactory)
@@ -132,12 +139,16 @@ func parseEnvFlags() config {
}
base := filepath.Dir(exec)
viper.SetDefault(sidecarFlag, "gcr.io/agones-images/agones-sdk:"+pkg.Version)
viper.SetDefault(sidecarImageFlag, "gcr.io/agones-images/agones-sdk:"+pkg.Version)
viper.SetDefault(sidecarCPURequestFlag, "0")
viper.SetDefault(sidecarCPULimitFlag, "0")
viper.SetDefault(pullSidecarFlag, false)
viper.SetDefault(certFileFlag, filepath.Join(base, "certs/server.crt"))
viper.SetDefault(keyFileFlag, filepath.Join(base, "certs/server.key"))
pflag.String(sidecarFlag, viper.GetString(sidecarFlag), "Flag to overwrite the GameServer sidecar image that is used. Can also use SIDECAR env variable")
pflag.String(sidecarImageFlag, viper.GetString(sidecarImageFlag), "Flag to overwrite the GameServer sidecar image that is used. Can also use SIDECAR env variable")
pflag.String(sidecarCPULimitFlag, viper.GetString(sidecarCPULimitFlag), "Flag to overwrite the GameServer sidecar container's cpu limit. Can also use SIDECAR_CPU_LIMIT env variable")
pflag.String(sidecarCPURequestFlag, viper.GetString(sidecarCPURequestFlag), "Flag to overwrite the GameServer sidecar container's cpu request. Can also use SIDECAR_CPU_REQUEST env variable")
pflag.Bool(pullSidecarFlag, viper.GetBool(pullSidecarFlag), "For development purposes, set the sidecar image to have a ImagePullPolicy of Always. Can also use ALWAYS_PULL_SIDECAR env variable")
pflag.Int32(minPortFlag, 0, "Required. The minimum port that that a GameServer can be allocated to. Can also use MIN_PORT env variable.")
pflag.Int32(maxPortFlag, 0, "Required. The maximum port that that a GameServer can be allocated to. Can also use MAX_PORT env variable")
@@ -146,55 +157,56 @@ func parseEnvFlags() config {
pflag.Parse()
viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
runtime.Must(viper.BindEnv(sidecarFlag))
runtime.Must(viper.BindEnv(sidecarImageFlag))
runtime.Must(viper.BindEnv(sidecarCPULimitFlag))
runtime.Must(viper.BindEnv(sidecarCPURequestFlag))
runtime.Must(viper.BindEnv(pullSidecarFlag))
runtime.Must(viper.BindEnv(minPortFlag))
runtime.Must(viper.BindEnv(maxPortFlag))
runtime.Must(viper.BindEnv(keyFileFlag))
runtime.Must(viper.BindEnv(certFileFlag))
runtime.Must(viper.BindPFlags(pflag.CommandLine))
minPort := int32(viper.GetInt64(minPortFlag))
maxPort := int32(viper.GetInt64(maxPortFlag))
sidecarImage := viper.GetString(sidecarFlag)
alwaysPullSidecar := viper.GetBool(pullSidecarFlag)
keyFile := viper.GetString(keyFileFlag)
certFile := viper.GetString(certFileFlag)
logger.WithField(sidecarFlag, sidecarImage).
WithField("minPort", minPort).
WithField("maxPort", maxPort).
WithField(keyFileFlag, keyFile).
WithField(certFileFlag, certFile).
WithField("alwaysPullSidecarImage", alwaysPullSidecar).
WithField("Version", pkg.Version).Info("starting gameServer operator...")
request, err := resource.ParseQuantity(viper.GetString(sidecarCPURequestFlag))
if err != nil {
logger.WithError(err).Fatalf("could not parse %s", sidecarCPURequestFlag)
}
limit, err := resource.ParseQuantity(viper.GetString(sidecarCPULimitFlag))
if err != nil {
logger.WithError(err).Fatalf("could not parse %s", sidecarCPULimitFlag)
}
return config{
minPort: minPort,
maxPort: maxPort,
sidecarImage: sidecarImage,
alwaysPullSidecar: alwaysPullSidecar,
keyFile: keyFile,
certFile: certFile,
MinPort: int32(viper.GetInt64(minPortFlag)),
MaxPort: int32(viper.GetInt64(maxPortFlag)),
SidecarImage: viper.GetString(sidecarImageFlag),
SidecarCPURequest: request,
SidecarCPULimit: limit,
AlwaysPullSidecar: viper.GetBool(pullSidecarFlag),
KeyFile: viper.GetString(keyFileFlag),
CertFile: viper.GetString(certFileFlag),
}
}
// config stores all required configuration to create a game server controller.
type config struct {
minPort int32
maxPort int32
sidecarImage string
alwaysPullSidecar bool
keyFile string
certFile string
MinPort int32
MaxPort int32
SidecarImage string
SidecarCPURequest resource.Quantity
SidecarCPULimit resource.Quantity
AlwaysPullSidecar bool
KeyFile string
CertFile string
}
// validate ensures the ctlConfig data is valid.
func (c config) validate() error {
if c.minPort <= 0 || c.maxPort <= 0 {
if c.MinPort <= 0 || c.MaxPort <= 0 {
return errors.New("min Port and Max Port values are required")
}
if c.maxPort < c.minPort {
if c.MaxPort < c.MinPort {
return errors.New("max Port cannot be set less that the Min Port")
}
return nil
@@ -0,0 +1,57 @@
# Limiting CPU & Memory
Kubernetes natively has inbuilt capabilities for requesting and limiting both CPU and Memory usage of running containers.
As a short description:
- CPU `Requests` are limits that are applied when there is CPU congestion, and as such can burst above their set limits.
- CPU `Limits` are hard limits on how much CPU time the particular container gets access to.
This is useful for game servers, not just as a mechanism to distribute compute resources evenly, but also as a way
to advice the Kubernetes scheduler how many game server processes it is able to fit into a given node in the cluster.
It's worth reading the [Managing Compute Resources for Containers](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/)
Kubernetes documentation for more details on "requests" and "limits" to both CPU and Memory, and how to configure them.
## GameServers
Since the `GameServer` specification provides a full [`PodSpecTemplate`](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.10/#podtemplatespec-v1-core),
we can take advantage of both resource limits and requests in our `GameServer` configurations.
For example, to set a CPU limit on our `GameServer` configuration of 250m/0.25 of a cpu,
we could do so as followed:
```yaml
apiVersion: "stable.agones.dev/v1alpha1"
kind: GameServer
metadata:
name: "simple-udp"
spec:
ports:
- name: default
portPolicy: "dynamic"
containerPort: 7654
template:
spec:
containers:
- name: simple-udp
image: gcr.io/agones-images/udp-server:0.4
resources:
limit:
cpu: "250m" #this is our limit here
```
If you do not set a limit or request, the default is set my Kubernetes at a 100m CPU request.
## SDK GameServer sidecar
⚠️⚠️⚠️ **This is currently a development feature and has not been released** ⚠️⚠️⚠️
You may also want to tweak the CPU request or limits on the SDK `GameServer` sidecar process that spins up alongside
each game server container.
You can do this through the [Helm configuration](../install/helm/README.md#configuration) when installing Agones.
By default, this is set to having a CPU request value of 30m, with no hard CPU limit. This ensures that the sidecar always has enough CPU
to function, but it is configurable in case a lower, or higher value is required on your clusters, or if you desire
hard limit.
@@ -92,6 +92,8 @@ The following tables lists the configurable parameters of the Agones chart and t
| `agones.image.controller.pullPolicy` | Image pull policy for the controller | `IfNotPresent` |
| `agones.image.controller.pullSecret` | Image pull secret for the controller | `` |
| `agones.image.sdk.name` | Image name for the sdk | `agones-sdk` |
| `agones.image.sdk.cpuRequest` | (⚠️ Development feature ⚠️) the [cpu request][constraints] for sdk server container | `30m` |
| `agones.image.sdk.cpuLimit` | (⚠️ Development feature ⚠️) the [cpu limit][constraints] for the sdk server container | `0` (none) |
| `agones.image.sdk.alwaysPull` | Tells if the sdk image should always be pulled | `false` |
| `agones.controller.healthCheck.http.port` | Port to use for liveness probe service | `8080` |
| `agones.controller.healthCheck.initialDelaySeconds` | Initial delay before performing the first probe (in seconds) | `3` |
@@ -104,6 +106,8 @@ The following tables lists the configurable parameters of the Agones chart and t
| `gameservers.minPort` | Minimum port to use for dynamic port allocation | `7000` |
| `gameservers.maxPort` | Maximum port to use for dynamic port allocation | `8000` |
[constraints]: https://kubernetes.io/docs/tasks/administer-cluster/manage-resources/cpu-constraint-namespace/
Specify each parameter using the `--set key=value[,key=value]` argument to `helm install`. For example,
```bash
@@ -51,16 +51,20 @@ spec:
image: "{{ .Values.agones.image.registry }}/{{ .Values.agones.image.controller.name}}:{{ .Values.agones.image.tag }}"
imagePullPolicy: {{ .Values.agones.image.controller.pullPolicy }}
env:
- name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always
value: {{ .Values.agones.image.sdk.alwaysPull | quote }}
# minimum port that can be exposed to GameServer traffic
- name: MIN_PORT
value: {{ .Values.gameservers.minPort | quote }}
value: {{ .Values.gameservers.minPort | quote }}
# maximum port that can be exposed to GameServer traffic
- name: MAX_PORT
value: {{ .Values.gameservers.maxPort | quote }}
- name: SIDECAR # overwrite the GameServer sidecar image that is used
- name: SIDECAR_IMAGE # overwrite the GameServer sidecar image that is used
value: "{{ .Values.agones.image.registry }}/{{ .Values.agones.image.sdk.name}}:{{ .Values.agones.image.tag }}"
- name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always
value: {{ .Values.agones.image.sdk.alwaysPull | quote }}
- name: SIDECAR_CPU_REQUEST
value: {{ .Values.agones.image.sdk.cpuRequest | quote }}
- name: SIDECAR_CPU_LIMIT
value: {{ .Values.agones.image.sdk.cpuLimit | quote }}
livenessProbe:
httpGet:
path: /live
@@ -37,6 +37,8 @@ agones:
pullPolicy: IfNotPresent
sdk:
name: agones-sdk
cpuRequest: 30m
cpuLimit: 0
alwaysPull: false
gameservers:
@@ -859,16 +859,20 @@ spec:
image: "gcr.io/agones-images/agones-controller:0.6.0-rc"
imagePullPolicy: IfNotPresent
env:
- name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always
value: "false"
# minimum port that can be exposed to GameServer traffic
- name: MIN_PORT
value: "7000"
value: "7000"
# maximum port that can be exposed to GameServer traffic
- name: MAX_PORT
value: "8000"
- name: SIDECAR # overwrite the GameServer sidecar image that is used
- name: SIDECAR_IMAGE # overwrite the GameServer sidecar image that is used
value: "gcr.io/agones-images/agones-sdk:0.6.0-rc"
- name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always
value: "false"
- name: SIDECAR_CPU_REQUEST
value: "30m"
- name: SIDECAR_CPU_LIMIT
value: "0"
livenessProbe:
httpGet:
path: /live
@@ -38,6 +38,7 @@ import (
extclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
"k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1beta1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/intstr"
@@ -59,6 +60,8 @@ type Controller struct {
logger *logrus.Entry
sidecarImage string
alwaysPullSidecarImage bool
sidecarCPURequest resource.Quantity
sidecarCPULimit resource.Quantity
crdGetter v1beta1.CustomResourceDefinitionInterface
podGetter typedcorev1.PodsGetter
podLister corelisterv1.PodLister
@@ -71,8 +74,9 @@ type Controller struct {
healthController *HealthController
workerqueue *workerqueue.WorkerQueue
allocationMutex *sync.Mutex
stop <-chan struct{}
recorder record.EventRecorder
stop <-chan struct {
}
recorder record.EventRecorder
}
// NewController returns a new gameserver crd controller
@@ -83,6 +87,8 @@ func NewController(
minPort, maxPort int32,
sidecarImage string,
alwaysPullSidecarImage bool,
sidecarCPURequest resource.Quantity,
sidecarCPULimit resource.Quantity,
kubeClient kubernetes.Interface,
kubeInformerFactory informers.SharedInformerFactory,
extClient extclientset.Interface,
@@ -95,6 +101,8 @@ func NewController(
c := &Controller{
sidecarImage: sidecarImage,
sidecarCPULimit: sidecarCPULimit,
sidecarCPURequest: sidecarCPURequest,
alwaysPullSidecarImage: alwaysPullSidecarImage,
allocationMutex: allocationMutex,
crdGetter: extClient.ApiextensionsV1beta1().CustomResourceDefinitions(),
@@ -463,6 +471,7 @@ func (c *Controller) sidecar(gs *v1alpha1.GameServer) corev1.Container {
},
},
},
Resources: corev1.ResourceRequirements{},
LivenessProbe: &corev1.Probe{
Handler: corev1.Handler{
HTTPGet: &corev1.HTTPGetAction{
@@ -474,6 +483,15 @@ func (c *Controller) sidecar(gs *v1alpha1.GameServer) corev1.Container {
PeriodSeconds: 3,
},
}
if !c.sidecarCPURequest.IsZero() {
sidecar.Resources.Requests = corev1.ResourceList{corev1.ResourceCPU: c.sidecarCPURequest}
}
if !c.sidecarCPULimit.IsZero() {
sidecar.Resources.Limits = corev1.ResourceList{corev1.ResourceCPU: c.sidecarCPULimit}
}
if c.alwaysPullSidecarImage {
sidecar.ImagePullPolicy = corev1.PullAlways
}
Oops, something went wrong.

0 comments on commit c30c70c

Please sign in to comment.