Skip to content

Commit

Permalink
Ratelimit replica creation
Browse files Browse the repository at this point in the history
  • Loading branch information
bprashanth committed May 8, 2015
1 parent 12de230 commit 8ecda5c
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 26 deletions.
2 changes: 1 addition & 1 deletion cmd/integration/integration.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ func startComponents(firstManifestURL, secondManifestURL, apiVersion string) (st
// ensure the service endpoints are sync'd several times within the window that the integration tests wait
go endpoints.Run(3, util.NeverStop)

controllerManager := replicationControllerPkg.NewReplicationManager(cl)
controllerManager := replicationControllerPkg.NewReplicationManager(cl, replicationControllerPkg.BurstReplicas)

// TODO: Write an integration test for the replication controllers watch.
go controllerManager.Run(3, util.NeverStop)
Expand Down
2 changes: 1 addition & 1 deletion cmd/kube-controller-manager/app/controllermanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ func (s *CMServer) Run(_ []string) error {
endpoints := service.NewEndpointController(kubeClient)
go endpoints.Run(s.ConcurrentEndpointSyncs, util.NeverStop)

controllerManager := replicationControllerPkg.NewReplicationManager(kubeClient)
controllerManager := replicationControllerPkg.NewReplicationManager(kubeClient, replicationControllerPkg.BurstReplicas)
go controllerManager.Run(s.ConcurrentRCSyncs, util.NeverStop)

cloud := cloudprovider.InitCloudProvider(s.CloudProvider, s.CloudConfigFile)
Expand Down
2 changes: 1 addition & 1 deletion cmd/kubernetes/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ func runControllerManager(machineList []string, cl *client.Client, nodeMilliCPU,
endpoints := service.NewEndpointController(cl)
go endpoints.Run(5, util.NeverStop)

controllerManager := controller.NewReplicationManager(cl)
controllerManager := controller.NewReplicationManager(cl, controller.BurstReplicas)
go controllerManager.Run(5, util.NeverStop)
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/cloudprovider/nodecontroller/nodecontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
// NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown
// (regardless of its current value) in the master, without contacting kubelet.
if readyCondition == nil {
glog.V(2).Infof("node %v is never updated by kubelet")
glog.V(2).Infof("node %v is never updated by kubelet", node.Name)
node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{
Type: api.NodeReady,
Status: api.ConditionUnknown,
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/controller_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func (r *RCExpectations) SatisfiedExpectations(rc *api.ReplicationController) bo
if podExp.Fulfilled() {
return true
} else {
glog.V(4).Infof("Controller %v still waiting on expectations %#v", podExp)
glog.V(4).Infof("Controller still waiting on expectations %#v", podExp)
return false
}
} else if err != nil {
Expand Down
31 changes: 21 additions & 10 deletions pkg/controller/replication_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,15 @@ const (
// of expectations, without it the RC could stay asleep forever. This should
// be set based on the expected latency of watch events.
//
// TODO: Set this per expectation, based on its size.
// Currently an rc can service (create *and* observe the watch events for said
// creation) about 10-20 pods a second, so it takes about 3.5 min to service
// 3000 pods. Just creation is limited to 30qps, and watching happens with
// ~10-30s latency/pod at scale.
ExpectationsTimeout = 6 * time.Minute
// creation) about 10-20 pods a second, so it takes about 1 min to service
// 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s
// latency/pod at the scale of 3000 pods over 100 nodes.
ExpectationsTimeout = 3 * time.Minute

// Realistic value of the burstReplica field for the replication manager based off
// performance requirements for kubernetes 1.0.
BurstReplicas = 500
)

// ReplicationManager is responsible for synchronizing ReplicationController objects stored
Expand All @@ -72,6 +75,9 @@ type ReplicationManager struct {
kubeClient client.Interface
podControl PodControlInterface

// An rc is temporarily suspended after creating/deleting these many replicas.
// It resumes normal action after observing the watch events for them.
burstReplicas int
// To allow injection of syncReplicationController for testing.
syncHandler func(rcKey string) error
// A TTLCache of pod creates/deletes each rc expects to see
Expand All @@ -89,7 +95,7 @@ type ReplicationManager struct {
}

// NewReplicationManager creates a new ReplicationManager.
func NewReplicationManager(kubeClient client.Interface) *ReplicationManager {
func NewReplicationManager(kubeClient client.Interface, burstReplicas int) *ReplicationManager {
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartRecordingToSink(kubeClient.Events(""))

Expand All @@ -99,8 +105,9 @@ func NewReplicationManager(kubeClient client.Interface) *ReplicationManager {
kubeClient: kubeClient,
recorder: eventBroadcaster.NewRecorder(api.EventSource{Component: "replication-controller"}),
},
expectations: NewRCExpectations(),
queue: workqueue.New(),
burstReplicas: burstReplicas,
expectations: NewRCExpectations(),
queue: workqueue.New(),
}

rm.controllerStore.Store, rm.rcController = framework.NewInformer(
Expand Down Expand Up @@ -277,24 +284,27 @@ func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, controller
diff := len(filteredPods) - controller.Spec.Replicas
if diff < 0 {
diff *= -1
diff = util.Min(diff, rm.burstReplicas)
rm.expectations.ExpectCreations(controller, diff)
wait := sync.WaitGroup{}
wait.Add(diff)
glog.V(2).Infof("Too few %q replicas, creating %d", controller.Name, diff)
glog.V(2).Infof("Too few %q/%q replicas, need %d, creating %d", controller.Namespace, controller.Name, controller.Spec.Replicas, diff)
for i := 0; i < diff; i++ {
go func() {
defer wait.Done()
if err := rm.podControl.createReplica(controller.Namespace, controller); err != nil {
// Decrement the expected number of creates because the informer won't observe this pod
glog.V(2).Infof("Failed creation, decrementing expectations for controller %q/%q", controller.Namespace, controller.Name)
rm.expectations.CreationObserved(controller)
util.HandleError(err)
}
}()
}
wait.Wait()
} else if diff > 0 {
diff = util.Min(diff, rm.burstReplicas)
rm.expectations.ExpectDeletions(controller, diff)
glog.V(2).Infof("Too many %q replicas, deleting %d", controller.Name, diff)
glog.V(2).Infof("Too many %q/%q replicas, need %d, deleting %d", controller.Namespace, controller.Name, controller.Spec.Replicas, diff)
// Sort the pods in the order such that not-ready < ready, unscheduled
// < scheduled, and pending < running. This ensures that we delete pods
// in the earlier stages whenever possible.
Expand All @@ -307,6 +317,7 @@ func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, controller
defer wait.Done()
if err := rm.podControl.deletePod(controller.Namespace, filteredPods[ix].Name); err != nil {
// Decrement the expected number of deletes because the informer won't observe this deletion
glog.V(2).Infof("Failed deletion, decrementing expectations for controller %q/%q", controller.Namespace, controller.Name)
rm.expectations.DeletionObserved(controller)
}
}(i)
Expand Down
118 changes: 107 additions & 11 deletions pkg/controller/replication_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ func startManagerAndWait(manager *ReplicationManager, pods int, t *testing.T) ch
func TestSyncReplicationControllerDoesNothing(t *testing.T) {
client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()})
fakePodControl := FakePodControl{}
manager := NewReplicationManager(client)
manager := NewReplicationManager(client, BurstReplicas)

// 2 running pods, a controller with 2 replicas, sync is a no-op
controllerSpec := newReplicationController(2)
Expand All @@ -240,7 +240,7 @@ func TestSyncReplicationControllerDoesNothing(t *testing.T) {
func TestSyncReplicationControllerDeletes(t *testing.T) {
client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()})
fakePodControl := FakePodControl{}
manager := NewReplicationManager(client)
manager := NewReplicationManager(client, BurstReplicas)
manager.podControl = &fakePodControl

// 2 running pods and a controller with 1 replica, one pod delete expected
Expand All @@ -254,7 +254,7 @@ func TestSyncReplicationControllerDeletes(t *testing.T) {

func TestSyncReplicationControllerCreates(t *testing.T) {
client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()})
manager := NewReplicationManager(client)
manager := NewReplicationManager(client, BurstReplicas)

// A controller with 2 replicas and no pods in the store, 2 creates expected
controller := newReplicationController(2)
Expand Down Expand Up @@ -319,7 +319,7 @@ func TestControllerNoReplicaUpdate(t *testing.T) {
testServer := httptest.NewServer(&fakeHandler)
defer testServer.Close()
client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})
manager := NewReplicationManager(client)
manager := NewReplicationManager(client, BurstReplicas)

// Steady state for the replication controller, no Status.Replicas updates expected
activePods := 5
Expand Down Expand Up @@ -348,7 +348,7 @@ func TestControllerUpdateReplicas(t *testing.T) {
defer testServer.Close()

client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})
manager := NewReplicationManager(client)
manager := NewReplicationManager(client, BurstReplicas)

// Insufficient number of pods in the system, and Status.Replicas is wrong;
// Status.Replica should update to match number of pods in system, 1 new pod should be created.
Expand Down Expand Up @@ -533,7 +533,7 @@ func TestSyncReplicationControllerDormancy(t *testing.T) {
client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})

fakePodControl := FakePodControl{}
manager := NewReplicationManager(client)
manager := NewReplicationManager(client, BurstReplicas)
manager.podControl = &fakePodControl

controllerSpec := newReplicationController(2)
Expand Down Expand Up @@ -572,7 +572,7 @@ func TestSyncReplicationControllerDormancy(t *testing.T) {
}

func TestPodControllerLookup(t *testing.T) {
manager := NewReplicationManager(client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()}))
manager := NewReplicationManager(client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()}), BurstReplicas)
testCases := []struct {
inRCs []*api.ReplicationController
pod *api.Pod
Expand Down Expand Up @@ -638,7 +638,7 @@ type FakeWatcher struct {
func TestWatchControllers(t *testing.T) {
fakeWatch := watch.NewFake()
client := &testclient.Fake{Watch: fakeWatch}
manager := NewReplicationManager(client)
manager := NewReplicationManager(client, BurstReplicas)

var testControllerSpec api.ReplicationController
received := make(chan string)
Expand Down Expand Up @@ -679,7 +679,7 @@ func TestWatchControllers(t *testing.T) {
func TestWatchPods(t *testing.T) {
fakeWatch := watch.NewFake()
client := &testclient.Fake{Watch: fakeWatch}
manager := NewReplicationManager(client)
manager := NewReplicationManager(client, BurstReplicas)

// Put one rc and one pod into the controller's stores
testControllerSpec := newReplicationController(1)
Expand Down Expand Up @@ -722,7 +722,7 @@ func TestWatchPods(t *testing.T) {
func TestUpdatePods(t *testing.T) {
fakeWatch := watch.NewFake()
client := &testclient.Fake{Watch: fakeWatch}
manager := NewReplicationManager(client)
manager := NewReplicationManager(client, BurstReplicas)

received := make(chan string)

Expand Down Expand Up @@ -780,7 +780,7 @@ func TestControllerUpdateRequeue(t *testing.T) {
defer testServer.Close()

client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})
manager := NewReplicationManager(client)
manager := NewReplicationManager(client, BurstReplicas)

rc := newReplicationController(1)
manager.controllerStore.Store.Add(rc)
Expand Down Expand Up @@ -852,3 +852,99 @@ func TestControllerUpdateStatusWithFailure(t *testing.T) {
t.Errorf("Expected 1 get and 2 updates, got %d gets %d updates", gets, updates)
}
}

func doTestControllerBurstReplicas(t *testing.T, burstReplicas, numReplicas int) {
client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()})
fakePodControl := FakePodControl{}
manager := NewReplicationManager(client, burstReplicas)
manager.podControl = &fakePodControl

controllerSpec := newReplicationController(numReplicas)
manager.controllerStore.Store.Add(controllerSpec)

expectedPods := util.Min(burstReplicas, numReplicas)
pods := newPodList(nil, numReplicas, api.PodPending, controllerSpec)
var scaleFactor = float64(numReplicas) / float64(burstReplicas)

// Size up the controller, then size it down, and confirm the expected create/delete pattern
for _, numReplicas := range []int{numReplicas, 0} {

controllerSpec.Spec.Replicas = numReplicas
manager.controllerStore.Store.Add(controllerSpec)

for i := 0.0; i < scaleFactor; i++ {
manager.syncReplicationController(getKey(controllerSpec, t))

// The store accrues active pods. It's also used by the rc to determine how many
// replicas to create.
activePods := len(manager.podStore.Store.List())
if numReplicas != 0 {
// This is the number of pods currently "in flight". They were created by the rc manager above,
// which then puts the rc to sleep till all of them have been observed.
expectedPods = util.Min(numReplicas-activePods, burstReplicas)
// This validates the rc manager sync actually created pods
validateSyncReplication(t, &fakePodControl, expectedPods, 0)

// This simulates the watch events for all but 1 of the expected pods.
// None of these should wake the controller because it has expectations==BurstReplicas.
for _, pod := range pods.Items[:expectedPods-1] {
manager.podStore.Store.Add(&pod)
manager.addPod(&pod)
}

podExp, exists, err := manager.expectations.GetExpectations(controllerSpec)
if !exists || err != nil {
t.Fatalf("Did not find expectations for rc.")
}
if add, _ := podExp.getExpectations(); add != 1 {
t.Fatalf("Expectations are wrong %v", podExp)
}
} else {
expectedPods = util.Min(numReplicas-activePods*-1, burstReplicas)
validateSyncReplication(t, &fakePodControl, 0, expectedPods)
for _, pod := range pods.Items[:expectedPods-1] {
manager.podStore.Store.Delete(&pod)
manager.deletePod(&pod)
}
podExp, exists, err := manager.expectations.GetExpectations(controllerSpec)
if !exists || err != nil {
t.Fatalf("Did not find expectations for rc.")
}
if _, del := podExp.getExpectations(); del != 1 {
t.Fatalf("Expectations are wrong %v", podExp)
}
}

// Check that the rc didn't take any action for all the above pods
fakePodControl.clear()
manager.syncReplicationController(getKey(controllerSpec, t))
validateSyncReplication(t, &fakePodControl, 0, 0)

// Create/Delete the last pod
// The last add pod will decrease the expectation of the rc to 0,
// which will cause it to create/delete the remaining replicas upto burstReplicas.
if numReplicas != 0 {
manager.podStore.Store.Add(&pods.Items[expectedPods-1])
manager.addPod(&pods.Items[expectedPods-1])
} else {
manager.podStore.Store.Delete(&pods.Items[expectedPods-1])
manager.deletePod(&pods.Items[expectedPods-1])
}
pods.Items = pods.Items[expectedPods:]
}

// Confirm that we've created the right number of replicas
activePods := len(manager.podStore.Store.List())
if activePods != controllerSpec.Spec.Replicas {
t.Fatalf("Unexpected number of active pods, expected %d, got %d", controllerSpec.Spec.Replicas, activePods)
}
// Replenish the pod list, since we cut it down sizing up
pods = newPodList(nil, numReplicas, api.PodRunning, controllerSpec)
}
}

func TestControllerBurstReplicas(t *testing.T) {
doTestControllerBurstReplicas(t, 5, 30)
doTestControllerBurstReplicas(t, 5, 12)
doTestControllerBurstReplicas(t, 3, 2)
}
8 changes: 8 additions & 0 deletions pkg/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ func logPanic(r interface{}) {
glog.Errorf("Recovered from panic: %#v (%v)\n%v", r, r, callers)
}

// Min will return the smaller of the 2 given integers.
func Min(a, b int) int {
if a < b {
return a
}
return b
}

// ErrorHandlers is a list of functions which will be invoked when an unreturnable
// error occurs.
var ErrorHandlers = []func(error){logError}
Expand Down

0 comments on commit 8ecda5c

Please sign in to comment.