kubernetes · lavalamp · Apr 10, 2015 · Mar 29, 2015 · bgrant0607 · Apr 7, 2015
diff --git a/pkg/scheduler/priorities.go b/pkg/scheduler/priorities.go
@@ -17,6 +17,8 @@ limitations under the License.
 package scheduler
 
 import (
+	"math"
+
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
 	"github.com/golang/glog"
@@ -132,3 +134,77 @@ func (n *NodeLabelPrioritizer) CalculateNodeLabelPriority(pod api.Pod, podLister
 	}
 	return result, nil
 }
+
+// BalancedResourceAllocation favors nodes with balanced resource usage rate.
+// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
+// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how
+// close the two metrics are to each other.
+// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
+// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
+func BalancedResourceAllocation(pod api.Pod, podLister PodLister, minionLister MinionLister) (HostPriorityList, error) {
+	nodes, err := minionLister.List()
+	if err != nil {
+		return HostPriorityList{}, err
+	}
+	podsToMachines, err := MapPodsToMachines(podLister)
+
+	list := HostPriorityList{}
+	for _, node := range nodes.Items {
+		list = append(list, calculateBalancedResourceAllocation(pod, node, podsToMachines[node.Name]))
+	}
+	return list, nil
+}
+
+func calculateBalancedResourceAllocation(pod api.Pod, node api.Node, pods []api.Pod) HostPriority {
+	totalMilliCPU := int64(0)
+	totalMemory := int64(0)
+	score := int(0)
+	for _, existingPod := range pods {
+		for _, container := range existingPod.Spec.Containers {
+			totalMilliCPU += container.Resources.Limits.Cpu().MilliValue()
+			totalMemory += container.Resources.Limits.Memory().Value()
+		}
+	}
+	// Add the resources requested by the current pod being scheduled.
+	// This also helps differentiate between differently sized, but empty, minions.
+	for _, container := range pod.Spec.Containers {
+		totalMilliCPU += container.Resources.Limits.Cpu().MilliValue()
+		totalMemory += container.Resources.Limits.Memory().Value()
+	}
+
+	capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
+	capacityMemory := node.Status.Capacity.Memory().Value()
+
+	cpuFraction := fractionOfCapacity(totalMilliCPU, capacityMilliCPU, node.Name)
+	memoryFraction := fractionOfCapacity(totalMemory, capacityMemory, node.Name)
+	if cpuFraction >= 1 || memoryFraction >= 1 {
+		// if requested >= capacity, the corresponding host should never be preferrred.
+		score = 0
+	} else {
+		// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
+		// respectively. Multilying the absolute value of the difference by 10 scales the value to
+		// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
+		// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
+		diff := math.Abs(cpuFraction - memoryFraction)
+		score = int(10 - diff*10)
+	}
+	glog.V(4).Infof(
+		"%v -> %v: Balanced Resource Allocation, Absolute/Requested: (%d, %d) / (%d, %d) Score: (%d)",
+		pod.Name, node.Name,
+		totalMilliCPU, totalMemory,
+		capacityMilliCPU, capacityMemory,
+		score,
+	)
+
+	return HostPriority{
+		host:  node.Name,
+		score: score,
+	}
+}
+
+func fractionOfCapacity(requested, capacity int64, node string) float64 {
+	if capacity == 0 {
+		return 1
+	}
+	return float64(requested) / float64(capacity)
+}
diff --git a/pkg/scheduler/priorities_test.go b/pkg/scheduler/priorities_test.go
@@ -368,3 +368,236 @@ func TestNewNodeLabelPriority(t *testing.T) {
 		}
 	}
 }
+
+func TestBalancedResourceAllocation(t *testing.T) {
+	labels1 := map[string]string{
+		"foo": "bar",
+		"baz": "blah",
+	}
+	labels2 := map[string]string{
+		"bar": "foo",
+		"baz": "blah",
+	}
+	machine1Spec := api.PodSpec{
+		Host: "machine1",
+	}
+	machine2Spec := api.PodSpec{
+		Host: "machine2",
+	}
+	noResources := api.PodSpec{
+		Containers: []api.Container{},
+	}
+	cpuOnly := api.PodSpec{
+		Host: "machine1",
+		Containers: []api.Container{
+			{
+				Resources: api.ResourceRequirements{
+					Limits: api.ResourceList{
+						"cpu": resource.MustParse("1000m"),
+					},
+				},
+			},
+			{
+				Resources: api.ResourceRequirements{
+					Limits: api.ResourceList{
+						"cpu": resource.MustParse("2000m"),
+					},
+				},
+			},
+		},
+	}
+	cpuOnly2 := cpuOnly
+	cpuOnly2.Host = "machine2"
+	cpuAndMemory := api.PodSpec{
+		Host: "machine2",
+		Containers: []api.Container{
+			{
+				Resources: api.ResourceRequirements{
+					Limits: api.ResourceList{
+						"cpu":    resource.MustParse("1000m"),
+						"memory": resource.MustParse("2000"),
+					},
+				},
+			},
+			{
+				Resources: api.ResourceRequirements{
+					Limits: api.ResourceList{
+						"cpu":    resource.MustParse("2000m"),
+						"memory": resource.MustParse("3000"),
+					},
+				},
+			},
+		},
+	}
+	tests := []struct {
+		pod          api.Pod
+		pods         []api.Pod
+		nodes        []api.Node
+		expectedList HostPriorityList
+		test         string
+	}{
+		{
+			/*
+				Minion1 scores (remaining resources) on 0-10 scale
+				CPU Fraction: 0 / 4000 = 0%
+				Memory Fraction: 0 / 10000 = 0%
+				Minion1 Score: 10 - (0-0)*10 = 10
+
+				Minion2 scores (remaining resources) on 0-10 scale
+				CPU Fraction: 0 / 4000 = 0 %
+				Memory Fraction: 0 / 10000 = 0%
+				Minion2 Score: 10 - (0-0)*10 = 10
+			*/
+			pod:          api.Pod{Spec: noResources},
+			nodes:        []api.Node{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 4000, 10000)},
+			expectedList: []HostPriority{{"machine1", 10}, {"machine2", 10}},
+			test:         "nothing scheduled, nothing requested",
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Fraction: 3000 / 4000= 75%
+				Memory Fraction: 5000 / 10000 = 50%
+				Minion1 Score: 10 - (0.75-0.5)*10 = 7
+
+				Minion2 scores on 0-10 scale
+				CPU Fraction: 3000 / 6000= 50%
+				Memory Fraction: 5000/10000 = 50%
+				Minion2 Score: 10 - (0.5-0.5)*10 = 10
+			*/
+			pod:          api.Pod{Spec: cpuAndMemory},
+			nodes:        []api.Node{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 6000, 10000)},
+			expectedList: []HostPriority{{"machine1", 7}, {"machine2", 10}},
+			test:         "nothing scheduled, resources requested, differently sized machines",
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Fraction: 0 / 4000= 0%
+				Memory Fraction: 0 / 10000 = 0%
+				Minion1 Score: 10 - (0-0)*10 = 10
+
+				Minion2 scores on 0-10 scale
+				CPU Fraction: 0 / 4000= 0%
+				Memory Fraction: 0 / 10000 = 0%
+				Minion2 Score: 10 - (0-0)*10 = 10
+			*/
+			pod:          api.Pod{Spec: noResources},
+			nodes:        []api.Node{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 4000, 10000)},
+			expectedList: []HostPriority{{"machine1", 10}, {"machine2", 10}},
+			test:         "no resources requested, pods scheduled",
+			pods: []api.Pod{
+				{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}},
+				{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+				{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+				{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+			},
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Fraction: 6000 / 10000 = 60%
+				Memory Fraction: 0 / 20000 = 0%
+				Minion1 Score: 10 - (0.6-0)*10 = 4
+
+				Minion2 scores on 0-10 scale
+				CPU Fraction: 6000 / 10000 = 60%
+				Memory Fraction: 5000 / 20000 = 25%
+				Minion2 Score: 10 - (0.6-0.25)*10 = 6
+			*/
+			pod:          api.Pod{Spec: noResources},
+			nodes:        []api.Node{makeMinion("machine1", 10000, 20000), makeMinion("machine2", 10000, 20000)},
+			expectedList: []HostPriority{{"machine1", 4}, {"machine2", 6}},
+			test:         "no resources requested, pods scheduled with resources",
+			pods: []api.Pod{
+				{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}},
+				{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+				{Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+				{Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+			},
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Fraction: 6000 / 10000 = 60%
+				Memory Fraction: 5000 / 20000 = 25%
+				Minion1 Score: 10 - (0.6-0.25)*10 = 6
+
+				Minion2 scores on 0-10 scale
+				CPU Fraction: 6000 / 10000 = 60%
+				Memory Fraction: 10000 / 20000 = 50%
+				Minion2 Score: 10 - (0.6-0.5)*10 = 9
+			*/
+			pod:          api.Pod{Spec: cpuAndMemory},
+			nodes:        []api.Node{makeMinion("machine1", 10000, 20000), makeMinion("machine2", 10000, 20000)},
+			expectedList: []HostPriority{{"machine1", 6}, {"machine2", 9}},
+			test:         "resources requested, pods scheduled with resources",
+			pods: []api.Pod{
+				{Spec: cpuOnly},
+				{Spec: cpuAndMemory},
+			},
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Fraction: 6000 / 10000 = 60%
+				Memory Fraction: 5000 / 20000 = 25%
+				Minion1 Score: 10 - (0.6-0.25)*10 = 6
+
+				Minion2 scores on 0-10 scale
+				CPU Fraction: 6000 / 10000 = 60%
+				Memory Fraction: 10000 / 50000 = 20%
+				Minion2 Score: 10 - (0.6-0.2)*10 = 6
+			*/
+			pod:          api.Pod{Spec: cpuAndMemory},
+			nodes:        []api.Node{makeMinion("machine1", 10000, 20000), makeMinion("machine2", 10000, 50000)},
+			expectedList: []HostPriority{{"machine1", 6}, {"machine2", 6}},
+			test:         "resources requested, pods scheduled with resources, differently sized machines",
+			pods: []api.Pod{
+				{Spec: cpuOnly},
+				{Spec: cpuAndMemory},
+			},
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
+				Memory Fraction: 0 / 10000 = 0
+				Minion1 Score: 0
+
+				Minion2 scores on 0-10 scale
+				CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
+				Memory Fraction 5000 / 10000 = 50%
+				Minion2 Score: 0
+			*/
+			pod:          api.Pod{Spec: cpuOnly},
+			nodes:        []api.Node{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 4000, 10000)},
+			expectedList: []HostPriority{{"machine1", 0}, {"machine2", 0}},
+			test:         "requested resources exceed minion capacity",
+			pods: []api.Pod{
+				{Spec: cpuOnly},
+				{Spec: cpuAndMemory},
+			},
+		},
+		{
+			pod:          api.Pod{Spec: noResources},
+			nodes:        []api.Node{makeMinion("machine1", 0, 0), makeMinion("machine2", 0, 0)},
+			expectedList: []HostPriority{{"machine1", 0}, {"machine2", 0}},
+			test:         "zero minion resources, pods scheduled with resources",
+			pods: []api.Pod{
+				{Spec: cpuOnly},
+				{Spec: cpuAndMemory},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		list, err := BalancedResourceAllocation(test.pod, FakePodLister(test.pods), FakeMinionLister(api.NodeList{Items: test.nodes}))
+		if err != nil {
+			t.Errorf("unexpected error: %v", err)
+		}
+		if !reflect.DeepEqual(test.expectedList, list) {
+			t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
+		}
+	}
+}