Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions charts/topograph/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ spec:
args:
- -v={{ .Values.verbosity }}
- -k8s-topology-key-accelerator={{ .Values.topologyNodeLabels.accelerator }}
- -k8s-topology-key-block={{ .Values.topologyNodeLabels.block }}
- -k8s-topology-key-leaf={{ .Values.topologyNodeLabels.leaf }}
- -k8s-topology-key-spine={{ .Values.topologyNodeLabels.spine }}
- -k8s-topology-key-datacenter={{ .Values.topologyNodeLabels.datacenter }}
- -k8s-topology-key-core={{ .Values.topologyNodeLabels.core }}
env:
- name: NODE_DATA_BROKER_NAME
value: {{ printf "%s-%s" .Release.Name "node-data-broker" | trunc 63 | trimSuffix "-" }}
Expand Down
4 changes: 2 additions & 2 deletions charts/topograph/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ config:

topologyNodeLabels:
accelerator: network.topology.nvidia.com/accelerator
block: network.topology.nvidia.com/block
leaf: network.topology.nvidia.com/leaf
spine: network.topology.nvidia.com/spine
datacenter: network.topology.nvidia.com/datacenter
core: network.topology.nvidia.com/core

podAnnotations: {}
podLabels: {}
Expand Down
8 changes: 4 additions & 4 deletions cmd/topograph/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ var GitTag string

func main() {
var cfg string
var labelAccelerator, labelBlock, labelSpine, labelDatacenter string
var labelAccelerator, labelLeaf, labelSpine, labelCore string
var version bool
flag.StringVar(&cfg, "c", "/etc/topograph/topograph-config.yaml", "config file")
flag.StringVar(&labelAccelerator, "k8s-topology-key-accelerator", k8s.DefaultLabelAccelerator, "K8s node label for accelerated network type")
flag.StringVar(&labelBlock, "k8s-topology-key-block", k8s.DefaultLabelBlock, "K8s node label for the cluster's lower network tier")
flag.StringVar(&labelLeaf, "k8s-topology-key-leaf", k8s.DefaultLabelLeaf, "K8s node label for the cluster's lower network tier")
flag.StringVar(&labelSpine, "k8s-topology-key-spine", k8s.DefaultLabelSpine, "K8s node label for the cluster's middle network tier")
flag.StringVar(&labelDatacenter, "k8s-topology-key-datacenter", k8s.DefaultLabelDatacenter, "K8s node label for the cluster's top network tier")
flag.StringVar(&labelCore, "k8s-topology-key-core", k8s.DefaultLabelCore, "K8s node label for the cluster's top network tier")
flag.BoolVar(&version, "version", false, "show the version")

klog.InitFlags(nil)
Expand All @@ -53,7 +53,7 @@ func main() {
os.Exit(0)
}

k8s.InitLabels(labelAccelerator, labelBlock, labelSpine, labelDatacenter)
k8s.InitLabels(labelAccelerator, labelLeaf, labelSpine, labelCore)

if err := mainInternal(cfg); err != nil {
klog.Error(err.Error())
Expand Down
16 changes: 8 additions & 8 deletions docs/engines/k8s.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,19 @@ Topograph is a tool designed to enhance scheduling decisions in Kubernetes clust
Topograph maps both the multi-tier network hierarchy and accelerated network domains (such as NVLink) using node labels.
Most cloud providers expose three levels of network topology through their APIs. To provide a unified view, Topograph assigns four labels to each node:
* `network.topology.nvidia.com/accelerator`: Identifies high-speed interconnect domains, such as NVLink.
* `network.topology.nvidia.com/block`: Indicates the switches directly connected to compute nodes.
* `network.topology.nvidia.com/spine`: Represents the next tier of switches above the block level.
* `network.topology.nvidia.com/datacenter`: Denotes the top-level switches.
* `network.topology.nvidia.com/leaf`: Indicates the switches directly connected to compute nodes.
* `network.topology.nvidia.com/spine`: Represents the next tier of switches above the leaf level.
* `network.topology.nvidia.com/core`: Denotes the top-level switches.

The names of these node labels are configurable via the [Helm chart](https://github.com/NVIDIA/topograph/tree/main/charts/topograph).

For example, if a node belongs to NVLink domain `nvl1` and connects to switch `s1`, which connects to switch `s2`, and then to switch `s3`, Topograph will apply the following labels to the node:

```
network.topology.nvidia.com/accelerator: nvl1
network.topology.nvidia.com/block: s1
network.topology.nvidia.com/leaf: s1
network.topology.nvidia.com/spine: s2
network.topology.nvidia.com/datacenter: s3
network.topology.nvidia.com/core: s3
```

<p align="center"><img src="../assets/topograph-k8s.png" width="600" alt="Design"></p>
Expand Down Expand Up @@ -55,15 +55,15 @@ closer network proximity.
operator: In
values:
- myapp
topologyKey: network.topology.nvidia.com/block
topologyKey: network.topology.nvidia.com/leaf
```
Pods are prioritized to be placed on nodes sharing the label `network.topology.nvidia.com/block`.
Pods are prioritized to be placed on nodes sharing the label `network.topology.nvidia.com/leaf`.
These nodes are connected to the same network switch, ensuring the lowest latency for communication.

Nodes with the label `network.topology.nvidia.com/spine` are next in priority.
Pods on these nodes will still be relatively close, but with slightly higher latency.

In the three-tier network, all nodes will share the same `network.topology.nvidia.com/datacenter` label,
In the three-tier network, all nodes will share the same `network.topology.nvidia.com/core` label,
so it doesn’t need to be included in pod affinity settings.

Since the default Kubernetes scheduler places one pod at a time, the placement may vary depending on where
Expand Down
14 changes: 7 additions & 7 deletions pkg/engines/k8s/labeler.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,23 @@ import (

const (
DefaultLabelAccelerator = "network.topology.nvidia.com/accelerator"
DefaultLabelBlock = "network.topology.nvidia.com/block"
DefaultLabelLeaf = "network.topology.nvidia.com/leaf"
DefaultLabelSpine = "network.topology.nvidia.com/spine"
DefaultLabelDatacenter = "network.topology.nvidia.com/datacenter"
DefaultLabelCore = "network.topology.nvidia.com/core"
)

var (
labelAccelerator, labelBlock, labelSpine, labelDatacenter string
labelAccelerator, labelLeaf, labelSpine, labelCore string

switchNetworkHierarchy []string
)

func InitLabels(accelerator, block, spine, datacenter string) {
func InitLabels(accelerator, leaf, spine, core string) {
labelAccelerator = accelerator
labelBlock = block
labelLeaf = leaf
labelSpine = spine
labelDatacenter = datacenter
switchNetworkHierarchy = []string{labelBlock, labelSpine, labelDatacenter}
labelCore = core
switchNetworkHierarchy = []string{labelLeaf, labelSpine, labelCore}
}

// map nodename:[label name: label value]
Expand Down
64 changes: 32 additions & 32 deletions pkg/engines/k8s/labeler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,16 @@ func (l *testLabeler) AddNodeLabels(_ context.Context, nodeName string, labels m
}

func TestApplyNodeLabelsWithTree(t *testing.T) {
InitLabels(DefaultLabelAccelerator, DefaultLabelBlock, DefaultLabelSpine, DefaultLabelDatacenter)
InitLabels(DefaultLabelAccelerator, DefaultLabelLeaf, DefaultLabelSpine, DefaultLabelCore)
root, _ := translate.GetTreeTestSet(true)
labeler := &testLabeler{data: make(map[string]map[string]string)}
data := map[string]map[string]string{
"Node201": {"network.topology.nvidia.com/block": "S2", "network.topology.nvidia.com/spine": "S1"},
"Node202": {"network.topology.nvidia.com/block": "S2", "network.topology.nvidia.com/spine": "S1"},
"Node205": {"network.topology.nvidia.com/block": "S2", "network.topology.nvidia.com/spine": "S1"},
"Node304": {"network.topology.nvidia.com/block": "xf946c4acef2d5939", "network.topology.nvidia.com/spine": "S1"},
"Node305": {"network.topology.nvidia.com/block": "xf946c4acef2d5939", "network.topology.nvidia.com/spine": "S1"},
"Node306": {"network.topology.nvidia.com/block": "xf946c4acef2d5939", "network.topology.nvidia.com/spine": "S1"},
"Node201": {"network.topology.nvidia.com/leaf": "S2", "network.topology.nvidia.com/spine": "S1"},
"Node202": {"network.topology.nvidia.com/leaf": "S2", "network.topology.nvidia.com/spine": "S1"},
"Node205": {"network.topology.nvidia.com/leaf": "S2", "network.topology.nvidia.com/spine": "S1"},
"Node304": {"network.topology.nvidia.com/leaf": "xf946c4acef2d5939", "network.topology.nvidia.com/spine": "S1"},
"Node305": {"network.topology.nvidia.com/leaf": "xf946c4acef2d5939", "network.topology.nvidia.com/spine": "S1"},
"Node306": {"network.topology.nvidia.com/leaf": "xf946c4acef2d5939", "network.topology.nvidia.com/spine": "S1"},
}

err := NewTopologyLabeler().ApplyNodeLabels(context.TODO(), root, labeler)
Expand All @@ -57,81 +57,81 @@ func TestApplyNodeLabelsWithTree(t *testing.T) {
}

func TestApplyNodeLabelsWithBlock(t *testing.T) {
InitLabels(DefaultLabelAccelerator, DefaultLabelBlock, DefaultLabelSpine, DefaultLabelDatacenter)
InitLabels(DefaultLabelAccelerator, DefaultLabelLeaf, DefaultLabelSpine, DefaultLabelCore)
root, _ := translate.GetBlockWithMultiIBTestSet()
labeler := &testLabeler{data: make(map[string]map[string]string)}
data := map[string]map[string]string{
"Node104": {
"network.topology.nvidia.com/accelerator": "B1",
"network.topology.nvidia.com/block": "S2",
"network.topology.nvidia.com/leaf": "S2",
"network.topology.nvidia.com/spine": "S1",
"network.topology.nvidia.com/datacenter": "IB2",
"network.topology.nvidia.com/core": "IB2",
},
"Node105": {
"network.topology.nvidia.com/accelerator": "B1",
"network.topology.nvidia.com/block": "S2",
"network.topology.nvidia.com/leaf": "S2",
"network.topology.nvidia.com/spine": "S1",
"network.topology.nvidia.com/datacenter": "IB2",
"network.topology.nvidia.com/core": "IB2",
},
"Node106": {
"network.topology.nvidia.com/accelerator": "B1",
"network.topology.nvidia.com/block": "S2",
"network.topology.nvidia.com/leaf": "S2",
"network.topology.nvidia.com/spine": "S1",
"network.topology.nvidia.com/datacenter": "IB2",
"network.topology.nvidia.com/core": "IB2",
},
"Node201": {
"network.topology.nvidia.com/accelerator": "B2",
"network.topology.nvidia.com/block": "S3",
"network.topology.nvidia.com/leaf": "S3",
"network.topology.nvidia.com/spine": "S1",
"network.topology.nvidia.com/datacenter": "IB2",
"network.topology.nvidia.com/core": "IB2",
},
"Node202": {
"network.topology.nvidia.com/accelerator": "B2",
"network.topology.nvidia.com/block": "S3",
"network.topology.nvidia.com/leaf": "S3",
"network.topology.nvidia.com/spine": "S1",
"network.topology.nvidia.com/datacenter": "IB2",
"network.topology.nvidia.com/core": "IB2",
},
"Node205": {
"network.topology.nvidia.com/accelerator": "B2",
"network.topology.nvidia.com/block": "S3",
"network.topology.nvidia.com/leaf": "S3",
"network.topology.nvidia.com/spine": "S1",
"network.topology.nvidia.com/datacenter": "IB2",
"network.topology.nvidia.com/core": "IB2",
},
"Node301": {
"network.topology.nvidia.com/accelerator": "B3",
"network.topology.nvidia.com/block": "S5",
"network.topology.nvidia.com/leaf": "S5",
"network.topology.nvidia.com/spine": "S4",
"network.topology.nvidia.com/datacenter": "IB1",
"network.topology.nvidia.com/core": "IB1",
},
"Node302": {
"network.topology.nvidia.com/accelerator": "B3",
"network.topology.nvidia.com/block": "S5",
"network.topology.nvidia.com/leaf": "S5",
"network.topology.nvidia.com/spine": "S4",
"network.topology.nvidia.com/datacenter": "IB1",
"network.topology.nvidia.com/core": "IB1",
},
"Node303": {
"network.topology.nvidia.com/accelerator": "B3",
"network.topology.nvidia.com/block": "S5",
"network.topology.nvidia.com/leaf": "S5",
"network.topology.nvidia.com/spine": "S4",
"network.topology.nvidia.com/datacenter": "IB1",
"network.topology.nvidia.com/core": "IB1",
},
"Node401": {
"network.topology.nvidia.com/accelerator": "B4",
"network.topology.nvidia.com/block": "S6",
"network.topology.nvidia.com/leaf": "S6",
"network.topology.nvidia.com/spine": "S4",
"network.topology.nvidia.com/datacenter": "IB1",
"network.topology.nvidia.com/core": "IB1",
},
"Node402": {
"network.topology.nvidia.com/accelerator": "B4",
"network.topology.nvidia.com/block": "S6",
"network.topology.nvidia.com/leaf": "S6",
"network.topology.nvidia.com/spine": "S4",
"network.topology.nvidia.com/datacenter": "IB1",
"network.topology.nvidia.com/core": "IB1",
},
"Node403": {
"network.topology.nvidia.com/accelerator": "B4",
"network.topology.nvidia.com/block": "S6",
"network.topology.nvidia.com/leaf": "S6",
"network.topology.nvidia.com/spine": "S4",
"network.topology.nvidia.com/datacenter": "IB1",
"network.topology.nvidia.com/core": "IB1",
},
}

Expand Down
8 changes: 4 additions & 4 deletions pkg/providers/aws/instance_topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ func (p *baseProvider) generateRegionInstanceTopology(ctx context.Context, pageS

func convert(inst *types.InstanceTopology) *topology.InstanceTopology {
topo := &topology.InstanceTopology{
InstanceID: *inst.InstanceId,
BlockID: inst.NetworkNodes[2],
SpineID: inst.NetworkNodes[1],
DatacenterID: inst.NetworkNodes[0],
InstanceID: *inst.InstanceId,
LeafID: inst.NetworkNodes[2],
SpineID: inst.NetworkNodes[1],
CoreID: inst.NetworkNodes[0],
}
if inst.CapacityBlockId != nil {
topo.AcceleratorID = *inst.CapacityBlockId
Expand Down
10 changes: 5 additions & 5 deletions pkg/providers/gcp/instance_topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@ func (p *baseProvider) generateRegionInstanceTopology(ctx context.Context, clien
continue
}
inst := &topology.InstanceTopology{
InstanceID: instanceId,
DatacenterID: instance.ResourceStatus.PhysicalHostTopology.GetCluster(),
SpineID: instance.ResourceStatus.PhysicalHostTopology.GetBlock(),
BlockID: instance.ResourceStatus.PhysicalHostTopology.GetSubblock(),
InstanceID: instanceId,
CoreID: instance.ResourceStatus.PhysicalHostTopology.GetCluster(),
SpineID: instance.ResourceStatus.PhysicalHostTopology.GetBlock(),
LeafID: instance.ResourceStatus.PhysicalHostTopology.GetSubblock(),
}
inst.AcceleratorID = inst.BlockID
inst.AcceleratorID = inst.LeafID
klog.Infof("Adding topology: %s", inst.String())
topo.Append(inst)
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/providers/lambdai/instance_topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ func (p *baseProvider) generateRegionInstanceTopology(ctx context.Context, clien
for indx := range len(inst.NetworkPath) {
switch indx {
case 0:
t.BlockID = inst.NetworkPath[indx]
t.LeafID = inst.NetworkPath[indx]
case 1:
t.SpineID = inst.NetworkPath[indx]
case 2:
t.DatacenterID = inst.NetworkPath[indx]
t.CoreID = inst.NetworkPath[indx]
default:
klog.Warningf("unsupported size %d of topology path for instance %q", len(inst.NetworkPath), inst.ID)
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/providers/nebius/instance_topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ func (p *baseProvider) generateRegionInstanceTopology(ctx context.Context, clien
path := ibTopology.GetPath()
switch len(path) {
case 3:
inst.DatacenterID = path[0]
inst.CoreID = path[0]
inst.SpineID = path[1]
inst.BlockID = path[2]
inst.LeafID = path[2]
default:
klog.Warningf("unsupported size %d of topology path for node %q", len(path), hostname)
continue
Expand Down
8 changes: 4 additions & 4 deletions pkg/providers/oci/instance_topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ func convert(host *core.ComputeHostSummary) (*topology.InstanceTopology, error)
}

topo := &topology.InstanceTopology{
InstanceID: *host.InstanceId,
BlockID: *host.LocalBlockId,
SpineID: *host.NetworkBlockId,
DatacenterID: *host.HpcIslandId,
InstanceID: *host.InstanceId,
LeafID: *host.LocalBlockId,
SpineID: *host.NetworkBlockId,
CoreID: *host.HpcIslandId,
}

if host.GpuMemoryFabricId != nil {
Expand Down
16 changes: 8 additions & 8 deletions pkg/providers/oci/instance_topology_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ import (

func TestConvert(t *testing.T) {
valid := &topology.InstanceTopology{
InstanceID: "id",
BlockID: "block",
SpineID: "net",
DatacenterID: "datacenter",
InstanceID: "id",
LeafID: "leaf",
SpineID: "net",
CoreID: "core",
}

testCases := []struct {
Expand All @@ -55,15 +55,15 @@ func TestConvert(t *testing.T) {
name: "Case 3: missing NetworkBlockId",
host: &core.ComputeHostSummary{
InstanceId: &valid.InstanceID,
LocalBlockId: &valid.BlockID,
LocalBlockId: &valid.LeafID,
},
err: `missing NetworkBlockId for instance "id"`,
},
{
name: "Case 4: missing HpcIslandId",
host: &core.ComputeHostSummary{
InstanceId: &valid.InstanceID,
LocalBlockId: &valid.BlockID,
LocalBlockId: &valid.LeafID,
NetworkBlockId: &valid.SpineID,
},
err: `missing HpcIslandId for instance "id"`,
Expand All @@ -72,9 +72,9 @@ func TestConvert(t *testing.T) {
name: "Case 5: valid input",
host: &core.ComputeHostSummary{
InstanceId: &valid.InstanceID,
LocalBlockId: &valid.BlockID,
LocalBlockId: &valid.LeafID,
NetworkBlockId: &valid.SpineID,
HpcIslandId: &valid.DatacenterID,
HpcIslandId: &valid.CoreID,
},
topo: valid,
},
Expand Down
4 changes: 2 additions & 2 deletions pkg/providers/oci/provider_imds.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ func (p *imdsProvider) getComputeHostInfo(ctx context.Context, ci topology.Compu
if nodeTopology, ok := topoMap[node]; ok {
topo.Instances = append(topo.Instances, &topology.InstanceTopology{
InstanceID: instanceID,
BlockID: nodeTopology.LocalBlock,
LeafID: nodeTopology.LocalBlock,
SpineID: nodeTopology.NetworkBlock,
DatacenterID: nodeTopology.HPCIslandId,
CoreID: nodeTopology.HPCIslandId,
AcceleratorID: nodeTopology.GpuMemoryFabric,
})
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/server/grpc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ func forwardRequest(ctx context.Context, tr *topology.Request, url string, cis [
func convert(inst *pb.Instance) *topology.InstanceTopology {
topo := &topology.InstanceTopology{
InstanceID: inst.Id,
BlockID: inst.NetworkLayers[0],
LeafID: inst.NetworkLayers[0],
SpineID: inst.NetworkLayers[1],
DatacenterID: inst.NetworkLayers[2],
CoreID: inst.NetworkLayers[2],
AcceleratorID: inst.NvlinkDomain,
}
klog.V(4).Infof("Adding instance topology %s", topo.String())
Expand Down
Loading