Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion charts/tensor-fusion/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.3.6
version: 1.3.7

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand Down
3 changes: 3 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ rules:
- gpunodeclasses
- gpunodes
- gpupools
- gpuresourcequotas
- gpus
- schedulingconfigtemplates
- tensorfusionclusters
Expand All @@ -154,6 +155,7 @@ rules:
- gpunodeclasses/finalizers
- gpunodes/finalizers
- gpupools/finalizers
- gpuresourcequotas/finalizers
- gpus/finalizers
- schedulingconfigtemplates/finalizers
- tensorfusionclusters/finalizers
Expand All @@ -168,6 +170,7 @@ rules:
- gpunodeclasses/status
- gpunodes/status
- gpupools/status
- gpuresourcequotas/status
- gpus/status
- schedulingconfigtemplates/status
- tensorfusionclusters/status
Expand Down
51 changes: 45 additions & 6 deletions internal/utils/owner_ref_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,74 @@ package utils
import (
context "context"

"fmt"

"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

// FindRootOwnerReference recursively finds the root owner reference for a given object (e.g. Pod).
func FindRootOwnerReference(ctx context.Context, c client.Client, namespace string, obj metav1.Object) (*metav1.OwnerReference, error) {
owners := obj.GetOwnerReferences()
if len(owners) == 0 {
return nil, nil
}
current := obj
for {
owners := current.GetOwnerReferences()
// if no owner, return self
if len(owners) == 0 {
return nil, nil // no owner, this is root
var apiVersion, kind string
if rObj, ok := current.(runtime.Object); ok {
gvk := rObj.GetObjectKind().GroupVersionKind()
apiVersion = gvk.GroupVersion().String()
kind = gvk.Kind
}

selfRef := metav1.OwnerReference{
APIVersion: apiVersion,
Kind: kind,
Name: current.GetName(),
UID: current.GetUID(),
}
return &selfRef, nil
}

// prefer ownerRef with controller=true
var ownerRef metav1.OwnerReference
foundController := false
for _, ref := range owners {
if ref.Controller != nil && *ref.Controller {
ownerRef = ref
foundController = true
break
}
}
if !foundController {
ownerRef = owners[0]
}
ownerRef := owners[0]
// Try to get the owner object as unstructured

unObj := &unstructured.Unstructured{}
unObj.SetAPIVersion(ownerRef.APIVersion)
unObj.SetKind(ownerRef.Kind)
key := client.ObjectKey{Name: ownerRef.Name, Namespace: namespace}
err := c.Get(ctx, key, unObj)
if err != nil {
// If not found, treat this ownerRef as root
return &ownerRef, nil
// if not found, return ownerRef as root
if errors.IsNotFound(err) {
return &ownerRef, nil
}
return nil, fmt.Errorf("get owner object: %w", err)
}

// Cast back to metav1.Object if possible
if metaObj, ok := any(unObj).(metav1.Object); ok {
current = metaObj
} else {
return &ownerRef, nil
return nil, fmt.Errorf("unexpected type for owner object %s/%s", ownerRef.Kind, ownerRef.Name)
}
}
}
142 changes: 142 additions & 0 deletions internal/utils/owner_ref_utils_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
package utils_test

import (
"context"
"testing"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

"github.com/stretchr/testify/require"

"github.com/NexusGPU/tensor-fusion/internal/utils"
)

func TestFindRootOwnerReference(t *testing.T) {
// Prepare the scheme
sch := runtime.NewScheme()
require.NoError(t, corev1.AddToScheme(sch))
require.NoError(t, appsv1.AddToScheme(sch))

t.Run("no owner returns self", func(t *testing.T) {
pod := &corev1.Pod{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "Pod",
},
ObjectMeta: metav1.ObjectMeta{
Name: "mypod",
Namespace: "default",
UID: "uid-pod",
},
}

// build fake client with only pod
c := fake.NewClientBuilder().WithScheme(sch).WithObjects(pod).Build()

rootRef, err := utils.FindRootOwnerReference(context.TODO(), c, "default", pod)
require.NoError(t, err)
require.Nil(t, rootRef)
})

t.Run("hierarchy returns deployment", func(t *testing.T) {
controller := true
deployment := &appsv1.Deployment{
TypeMeta: metav1.TypeMeta{
APIVersion: "apps/v1",
Kind: "Deployment",
},
ObjectMeta: metav1.ObjectMeta{
Name: "mydeploy",
Namespace: "default",
UID: "uid-deploy",
},
}

rs := &appsv1.ReplicaSet{
TypeMeta: metav1.TypeMeta{
APIVersion: "apps/v1",
Kind: "ReplicaSet",
},
ObjectMeta: metav1.ObjectMeta{
Name: "myrs",
Namespace: "default",
UID: "uid-rs",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "apps/v1",
Kind: "Deployment",
Name: "mydeploy",
UID: deployment.UID,
Controller: &controller,
},
},
},
}

pod := &corev1.Pod{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "Pod",
},
ObjectMeta: metav1.ObjectMeta{
Name: "mypod",
Namespace: "default",
UID: "uid-pod",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "apps/v1",
Kind: "ReplicaSet",
Name: "myrs",
UID: rs.UID,
Controller: &controller,
},
},
},
}

c := fake.NewClientBuilder().WithScheme(sch).WithObjects(pod, rs, deployment).Build()

rootRef, err := utils.FindRootOwnerReference(context.TODO(), c, "default", pod)
require.NoError(t, err)
require.NotNil(t, rootRef)
require.Equal(t, "mydeploy", rootRef.Name)
require.Equal(t, "Deployment", rootRef.Kind)
})

t.Run("missing owner returns ownerRef", func(t *testing.T) {
// Pod refers to a ReplicaSet that doesn't exist in fake client
controller := true
pod := &corev1.Pod{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "Pod",
},
ObjectMeta: metav1.ObjectMeta{
Name: "mypod",
Namespace: "default",
UID: "uid-pod",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "apps/v1",
Kind: "ReplicaSet",
Name: "missing-rs",
UID: "uid-missing",
Controller: &controller,
},
},
},
}

c := fake.NewClientBuilder().WithScheme(sch).WithObjects(pod).Build()

rootRef, err := utils.FindRootOwnerReference(context.TODO(), c, "default", pod)
require.NoError(t, err)
require.NotNil(t, rootRef)
require.Equal(t, "missing-rs", rootRef.Name)
require.Equal(t, "ReplicaSet", rootRef.Kind)
})
}