Implement BRA algorithm as a new priority function in scheduler

Balanced Resource Allocation policy can now be enabled to so that host(s) with balanced resource usage would be preferred. The score given by BRA also scales from 0 to 10 with 10 representing that the resource usage is well balanced.
kubernetes · Apr 10, 2015 · 71b2af1 · 71b2af1
1 parent b12d75d
commit 71b2af1
Show file tree

Hide file tree

Showing 2 changed files with 309 additions and 0 deletions.
diff --git a/pkg/scheduler/priorities.go b/pkg/scheduler/priorities.go
@@ -17,6 +17,8 @@ limitations under the License.
 package scheduler
 
 import (
+	"math"
+
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
 	"github.com/golang/glog"
@@ -132,3 +134,77 @@ func (n *NodeLabelPrioritizer) CalculateNodeLabelPriority(pod api.Pod, podLister
 	}
 	return result, nil
 }
+
+// BalancedResourceAllocation favors nodes with balanced resource usage rate.
+// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
+// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how
+// close the two metrics are to each other.
+// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
+// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
+func BalancedResourceAllocation(pod api.Pod, podLister PodLister, minionLister MinionLister) (HostPriorityList, error) {
+	nodes, err := minionLister.List()
+	if err != nil {
+		return HostPriorityList{}, err
+	}
+	podsToMachines, err := MapPodsToMachines(podLister)
+
+	list := HostPriorityList{}
+	for _, node := range nodes.Items {
+		list = append(list, calculateBalancedResourceAllocation(pod, node, podsToMachines[node.Name]))
+	}
+	return list, nil
+}
+
+func calculateBalancedResourceAllocation(pod api.Pod, node api.Node, pods []api.Pod) HostPriority {
+	totalMilliCPU := int64(0)
+	totalMemory := int64(0)
+	score := int(0)
+	for _, existingPod := range pods {
+		for _, container := range existingPod.Spec.Containers {
+			totalMilliCPU += container.Resources.Limits.Cpu().MilliValue()
+			totalMemory += container.Resources.Limits.Memory().Value()
+		}
+	}
+	// Add the resources requested by the current pod being scheduled.
+	// This also helps differentiate between differently sized, but empty, minions.
+	for _, container := range pod.Spec.Containers {
+		totalMilliCPU += container.Resources.Limits.Cpu().MilliValue()
+		totalMemory += container.Resources.Limits.Memory().Value()
+	}
+
+	capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
+	capacityMemory := node.Status.Capacity.Memory().Value()
+
+	cpuFraction := fractionOfCapacity(totalMilliCPU, capacityMilliCPU, node.Name)
+	memoryFraction := fractionOfCapacity(totalMemory, capacityMemory, node.Name)
+	if cpuFraction >= 1 || memoryFraction >= 1 {
+		// if requested >= capacity, the corresponding host should never be preferrred.
+		score = 0
+	} else {
+		// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
+		// respectively. Multilying the absolute value of the difference by 10 scales the value to
+		// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
+		// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
+		diff := math.Abs(cpuFraction - memoryFraction)
+		score = int(10 - diff*10)
+	}
+	glog.V(4).Infof(
+		"%v -> %v: Balanced Resource Allocation, Absolute/Requested: (%d, %d) / (%d, %d) Score: (%d)",
+		pod.Name, node.Name,
+		totalMilliCPU, totalMemory,
+		capacityMilliCPU, capacityMemory,
+		score,
+	)
+
+	return HostPriority{
+		host:  node.Name,
+		score: score,
+	}
+}
+
+func fractionOfCapacity(requested, capacity int64, node string) float64 {
+	if capacity == 0 {
+		return 1
+	}
+	return float64(requested) / float64(capacity)
+}
diff --git a/pkg/scheduler/priorities_test.go b/pkg/scheduler/priorities_test.go
@@ -368,3 +368,236 @@ func TestNewNodeLabelPriority(t *testing.T) {
 		}
 	}
 }
+
+func TestBalancedResourceAllocation(t *testing.T) {
+	labels1 := map[string]string{
+		"foo": "bar",
+		"baz": "blah",
+	}
+	labels2 := map[string]string{
+		"bar": "foo",
+		"baz": "blah",
+	}
+	machine1Spec := api.PodSpec{
+		Host: "machine1",
+	}
+	machine2Spec := api.PodSpec{
+		Host: "machine2",
+	}
+	noResources := api.PodSpec{
+		Containers: []api.Container{},
+	}
+	cpuOnly := api.PodSpec{
+		Host: "machine1",
+		Containers: []api.Container{
+			{
+				Resources: api.ResourceRequirements{
+					Limits: api.ResourceList{
+						"cpu": resource.MustParse("1000m"),
+					},
+				},
+			},
+			{
+				Resources: api.ResourceRequirements{
+					Limits: api.ResourceList{
+						"cpu": resource.MustParse("2000m"),
+					},
+				},
+			},
+		},
+	}
+	cpuOnly2 := cpuOnly
+	cpuOnly2.Host = "machine2"
+	cpuAndMemory := api.PodSpec{
+		Host: "machine2",
+		Containers: []api.Container{
+			{
+				Resources: api.ResourceRequirements{
+					Limits: api.ResourceList{
+						"cpu":    resource.MustParse("1000m"),
+						"memory": resource.MustParse("2000"),
+					},
+				},
+			},
+			{
+				Resources: api.ResourceRequirements{
+					Limits: api.ResourceList{
+						"cpu":    resource.MustParse("2000m"),
+						"memory": resource.MustParse("3000"),
+					},
+				},
+			},
+		},
+	}
+	tests := []struct {
+		pod          api.Pod
+		pods         []api.Pod
+		nodes        []api.Node
+		expectedList HostPriorityList
+		test         string
+	}{
+		{
+			/*
+				Minion1 scores (remaining resources) on 0-10 scale
+				CPU Fraction: 0 / 4000 = 0%
+				Memory Fraction: 0 / 10000 = 0%
+				Minion1 Score: 10 - (0-0)*10 = 10
+
+				Minion2 scores (remaining resources) on 0-10 scale
+				CPU Fraction: 0 / 4000 = 0 %
+				Memory Fraction: 0 / 10000 = 0%
+				Minion2 Score: 10 - (0-0)*10 = 10
+			*/
+			pod:          api.Pod{Spec: noResources},
+			nodes:        []api.Node{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 4000, 10000)},
+			expectedList: []HostPriority{{"machine1", 10}, {"machine2", 10}},
+			test:         "nothing scheduled, nothing requested",
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Fraction: 3000 / 4000= 75%
+				Memory Fraction: 5000 / 10000 = 50%
+				Minion1 Score: 10 - (0.75-0.5)*10 = 7
+
+				Minion2 scores on 0-10 scale
+				CPU Fraction: 3000 / 6000= 50%
+				Memory Fraction: 5000/10000 = 50%
+				Minion2 Score: 10 - (0.5-0.5)*10 = 10
+			*/
+			pod:          api.Pod{Spec: cpuAndMemory},
+			nodes:        []api.Node{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 6000, 10000)},
+			expectedList: []HostPriority{{"machine1", 7}, {"machine2", 10}},
+			test:         "nothing scheduled, resources requested, differently sized machines",
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Fraction: 0 / 4000= 0%
+				Memory Fraction: 0 / 10000 = 0%
+				Minion1 Score: 10 - (0-0)*10 = 10
+
+				Minion2 scores on 0-10 scale
+				CPU Fraction: 0 / 4000= 0%
+				Memory Fraction: 0 / 10000 = 0%
+				Minion2 Score: 10 - (0-0)*10 = 10
+			*/
+			pod:          api.Pod{Spec: noResources},
+			nodes:        []api.Node{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 4000, 10000)},
+			expectedList: []HostPriority{{"machine1", 10}, {"machine2", 10}},
+			test:         "no resources requested, pods scheduled",
+			pods: []api.Pod{
+				{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}},
+				{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+				{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+				{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+			},
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Fraction: 6000 / 10000 = 60%
+				Memory Fraction: 0 / 20000 = 0%
+				Minion1 Score: 10 - (0.6-0)*10 = 4
+
+				Minion2 scores on 0-10 scale
+				CPU Fraction: 6000 / 10000 = 60%
+				Memory Fraction: 5000 / 20000 = 25%
+				Minion2 Score: 10 - (0.6-0.25)*10 = 6
+			*/
+			pod:          api.Pod{Spec: noResources},
+			nodes:        []api.Node{makeMinion("machine1", 10000, 20000), makeMinion("machine2", 10000, 20000)},
+			expectedList: []HostPriority{{"machine1", 4}, {"machine2", 6}},
+			test:         "no resources requested, pods scheduled with resources",
+			pods: []api.Pod{
+				{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}},
+				{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+				{Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+				{Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+			},
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Fraction: 6000 / 10000 = 60%
+				Memory Fraction: 5000 / 20000 = 25%
+				Minion1 Score: 10 - (0.6-0.25)*10 = 6
+
+				Minion2 scores on 0-10 scale
+				CPU Fraction: 6000 / 10000 = 60%
+				Memory Fraction: 10000 / 20000 = 50%
+				Minion2 Score: 10 - (0.6-0.5)*10 = 9
+			*/
+			pod:          api.Pod{Spec: cpuAndMemory},
+			nodes:        []api.Node{makeMinion("machine1", 10000, 20000), makeMinion("machine2", 10000, 20000)},
+			expectedList: []HostPriority{{"machine1", 6}, {"machine2", 9}},
+			test:         "resources requested, pods scheduled with resources",
+			pods: []api.Pod{
+				{Spec: cpuOnly},
+				{Spec: cpuAndMemory},
+			},
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Fraction: 6000 / 10000 = 60%
+				Memory Fraction: 5000 / 20000 = 25%
+				Minion1 Score: 10 - (0.6-0.25)*10 = 6
+
+				Minion2 scores on 0-10 scale
+				CPU Fraction: 6000 / 10000 = 60%
+				Memory Fraction: 10000 / 50000 = 20%
+				Minion2 Score: 10 - (0.6-0.2)*10 = 6
+			*/
+			pod:          api.Pod{Spec: cpuAndMemory},
+			nodes:        []api.Node{makeMinion("machine1", 10000, 20000), makeMinion("machine2", 10000, 50000)},
+			expectedList: []HostPriority{{"machine1", 6}, {"machine2", 6}},
+			test:         "resources requested, pods scheduled with resources, differently sized machines",
+			pods: []api.Pod{
+				{Spec: cpuOnly},
+				{Spec: cpuAndMemory},
+			},
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
+				Memory Fraction: 0 / 10000 = 0
+				Minion1 Score: 0
+
+				Minion2 scores on 0-10 scale
+				CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
+				Memory Fraction 5000 / 10000 = 50%
+				Minion2 Score: 0
+			*/
+			pod:          api.Pod{Spec: cpuOnly},
+			nodes:        []api.Node{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 4000, 10000)},
+			expectedList: []HostPriority{{"machine1", 0}, {"machine2", 0}},
+			test:         "requested resources exceed minion capacity",
+			pods: []api.Pod{
+				{Spec: cpuOnly},
+				{Spec: cpuAndMemory},
+			},
+		},
+		{
+			pod:          api.Pod{Spec: noResources},
+			nodes:        []api.Node{makeMinion("machine1", 0, 0), makeMinion("machine2", 0, 0)},
+			expectedList: []HostPriority{{"machine1", 0}, {"machine2", 0}},
+			test:         "zero minion resources, pods scheduled with resources",
+			pods: []api.Pod{
+				{Spec: cpuOnly},
+				{Spec: cpuAndMemory},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		list, err := BalancedResourceAllocation(test.pod, FakePodLister(test.pods), FakeMinionLister(api.NodeList{Items: test.nodes}))
+		if err != nil {
+			t.Errorf("unexpected error: %v", err)
+		}
+		if !reflect.DeepEqual(test.expectedList, list) {
+			t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
+		}
+	}
+}