## Seldon V2 Kubernetes Model and Server Deletion Tests


In [8]:
!helm upgrade --install seldon-core-v2-crds  ../k8s/helm-charts/seldon-core-v2-crds -n seldon-mesh

Release "seldon-core-v2-crds" has been upgraded. Happy Helming!
NAME: seldon-core-v2-crds
LAST DEPLOYED: Thu Jul 27 10:23:00 2023
NAMESPACE: seldon-mesh
STATUS: deployed
REVISION: 2
TEST SUITE: None


In [9]:
!helm upgrade --install seldon-core-v2 ../k8s/helm-charts/seldon-core-v2-setup/ -n seldon-mesh --wait

Release "seldon-core-v2" does not exist. Installing it now.
NAME: seldon-core-v2
LAST DEPLOYED: Thu Jul 27 10:23:03 2023
NAMESPACE: seldon-mesh
STATUS: deployed
REVISION: 1
TEST SUITE: None


In [10]:
!helm upgrade --install seldon-v2-runtime ../k8s/helm-charts/seldon-core-v2-runtime   --namespace seldon-mesh --wait

Release "seldon-v2-runtime" does not exist. Installing it now.
NAME: seldon-v2-runtime
LAST DEPLOYED: Thu Jul 27 10:23:23 2023
NAMESPACE: seldon-mesh
STATUS: deployed
REVISION: 1
TEST SUITE: None


In [24]:
!helm upgrade --install seldon-v2-servers ../k8s/helm-charts/seldon-core-v2-servers  -n seldon-mesh --wait

Release "seldon-v2-servers" has been upgraded. Happy Helming!
NAME: seldon-v2-servers
LAST DEPLOYED: Thu Jul 27 13:50:38 2023
NAMESPACE: seldon-mesh
STATUS: deployed
REVISION: 2
TEST SUITE: None


In [1]:
import os
os.environ["NAMESPACE"] = "seldon-mesh"

In [2]:
MESH_IP=!kubectl get svc seldon-mesh -n ${NAMESPACE} -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
MESH_IP=MESH_IP[0]
import os
os.environ['MESH_IP'] = MESH_IP
MESH_IP

'172.18.255.2'

In [3]:
!kubectl wait --for condition=ready --timeout=300s seldonruntime --all -n ${NAMESPACE}

seldonruntime.mlops.seldon.io/seldon condition met


In [4]:
!kubectl wait --for condition=ready --timeout=300s server --all -n ${NAMESPACE}

server.mlops.seldon.io/mlserver condition met
server.mlops.seldon.io/triton condition met


### Check Model can be deleted after Server delete on which it is running

In [33]:
!kubectl create -f ./models/sklearn-iris-gs.yaml -n ${NAMESPACE}

model.mlops.seldon.io/iris created


In [34]:
!kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}

model.mlops.seldon.io/iris condition met


In [35]:
!kubectl get model iris -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M .

{
  "conditions": [
    {
      "lastTransitionTime": "2023-07-31T09:54:40Z",
      "message": "ModelAvailable",
      "status": "True",
      "type": "ModelReady"
    },
    {
      "lastTransitionTime": "2023-07-31T09:54:40Z",
      "status": "True",
      "type": "Ready"
    }
  ],
  "replicas": 1
}


In [36]:
!kubectl delete server mlserver -n ${NAMESPACE} 

server.mlops.seldon.io "mlserver" deleted


In [37]:
!kubectl get model iris -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M .

{
  "conditions": [
    {
      "lastTransitionTime": "2023-07-31T09:54:44Z",
      "message": "ScheduleFailed",
      "reason": "failed to schedule model iris. [failed replica filter RequirementsReplicaFilter for server replica triton:0 : model requirements [sklearn] replica capabilities [triton dali fil onnx openvino python pytorch tensorflow tensorrt]]",
      "status": "False",
      "type": "ModelReady"
    },
    {
      "lastTransitionTime": "2023-07-31T09:54:44Z",
      "message": "ScheduleFailed",
      "reason": "failed to schedule model iris. [failed replica filter RequirementsReplicaFilter for server replica triton:0 : model requirements [sklearn] replica capabilities [triton dali fil onnx openvino python pytorch tensorflow tensorrt]]",
      "status": "False",
      "type": "Ready"
    }
  ],
  "replicas": 1
}


In [38]:
!helm upgrade --install seldon-v2-servers ../k8s/helm-charts/seldon-core-v2-servers  -n seldon-mesh --wait

Release "seldon-v2-servers" has been upgraded. Happy Helming!
NAME: seldon-v2-servers
LAST DEPLOYED: Mon Jul 31 10:54:56 2023
NAMESPACE: seldon-mesh
STATUS: deployed
REVISION: 6
TEST SUITE: None


In [39]:
!kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}

model.mlops.seldon.io/iris condition met


In [40]:
!kubectl get model iris -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M .

{
  "conditions": [
    {
      "lastTransitionTime": "2023-07-31T09:55:01Z",
      "message": "ModelAvailable",
      "status": "True",
      "type": "ModelReady"
    },
    {
      "lastTransitionTime": "2023-07-31T09:55:01Z",
      "status": "True",
      "type": "Ready"
    }
  ],
  "replicas": 1
}


In [41]:
!kubectl delete server mlserver -n ${NAMESPACE} 

server.mlops.seldon.io "mlserver" deleted


In [42]:
!kubectl get model iris -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M .

{
  "conditions": [
    {
      "lastTransitionTime": "2023-07-31T09:55:24Z",
      "message": "ScheduleFailed",
      "reason": "failed to schedule model iris. [failed replica filter RequirementsReplicaFilter for server replica triton:0 : model requirements [sklearn] replica capabilities [triton dali fil onnx openvino python pytorch tensorflow tensorrt]]",
      "status": "False",
      "type": "ModelReady"
    },
    {
      "lastTransitionTime": "2023-07-31T09:55:24Z",
      "message": "ScheduleFailed",
      "reason": "failed to schedule model iris. [failed replica filter RequirementsReplicaFilter for server replica triton:0 : model requirements [sklearn] replica capabilities [triton dali fil onnx openvino python pytorch tensorflow tensorrt]]",
      "status": "False",
      "type": "Ready"
    }
  ],
  "replicas": 1
}


In [43]:
!kubectl delete -f ./models/sklearn-iris-gs.yaml -n ${NAMESPACE}

model.mlops.seldon.io "iris" deleted


In [44]:
!helm upgrade --install seldon-v2-servers ../k8s/helm-charts/seldon-core-v2-servers  -n seldon-mesh --wait

Release "seldon-v2-servers" has been upgraded. Happy Helming!
NAME: seldon-v2-servers
LAST DEPLOYED: Mon Jul 31 10:55:35 2023
NAMESPACE: seldon-mesh
STATUS: deployed
REVISION: 7
TEST SUITE: None


### Check Model can be deleted after failed scheduling

In [12]:
!kubectl create -f ./models/sklearn-iris-gs-baduri.yaml -n ${NAMESPACE}

model.mlops.seldon.io/iris created


In [13]:
!kubectl get model iris -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M .

{
  "conditions": [
    {
      "lastTransitionTime": "2023-07-31T09:38:59Z",
      "message": "ModelFailed",
      "reason": "Failed to download from gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-notexistent-uri any files",
      "status": "False",
      "type": "ModelReady"
    },
    {
      "lastTransitionTime": "2023-07-31T09:38:59Z",
      "message": "ModelFailed",
      "reason": "Failed to download from gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-notexistent-uri any files",
      "status": "False",
      "type": "Ready"
    }
  ],
  "replicas": 1
}


In [14]:
!kubectl delete -f ./models/sklearn-iris-gs-baduri.yaml -n ${NAMESPACE}

model.mlops.seldon.io "iris" deleted


In [15]:
!kubectl get model -n ${NAMESPACE}

No resources found in seldon-mesh namespace.


## Check Models rescheduled when Server deleted

 * Add models to initial MLServer
 * Create second MLServer 
 * Delete first MLServer
 * Model rescheduled
 

In [57]:
!kubectl create -f ./models/sklearn-iris-gs.yaml -n ${NAMESPACE}

model.mlops.seldon.io/iris created


In [58]:
!kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}

model.mlops.seldon.io/iris condition met


In [59]:
!kubectl get model iris -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M .

{
  "conditions": [
    {
      "lastTransitionTime": "2023-07-31T13:45:56Z",
      "message": "ModelAvailable",
      "status": "True",
      "type": "ModelReady"
    },
    {
      "lastTransitionTime": "2023-07-31T13:45:56Z",
      "status": "True",
      "type": "Ready"
    }
  ],
  "replicas": 1
}


In [60]:
!cat ./servers/mlserver2.yaml

apiVersion: mlops.seldon.io/v1alpha1
kind: Server
metadata:
  name: mlserver2
spec:
  serverConfig: mlserver


In [61]:
!kubectl create -f ./servers/mlserver2.yaml -n ${NAMESPACE}

server.mlops.seldon.io/mlserver2 created


In [62]:
!kubectl wait --for condition=ready --timeout=300s server --all -n ${NAMESPACE}

server.mlops.seldon.io/mlserver condition met
server.mlops.seldon.io/mlserver2 condition met
server.mlops.seldon.io/triton condition met


In [63]:
!kubectl delete server mlserver -n ${NAMESPACE} 

server.mlops.seldon.io "mlserver" deleted


In [64]:
!kubectl get model iris -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M .

{
  "conditions": [
    {
      "lastTransitionTime": "2023-07-31T13:46:15Z",
      "message": "ModelAvailable",
      "status": "True",
      "type": "ModelReady"
    },
    {
      "lastTransitionTime": "2023-07-31T13:46:15Z",
      "status": "True",
      "type": "Ready"
    }
  ],
  "replicas": 1
}


In [65]:
!kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}

model.mlops.seldon.io/iris condition met


In [66]:
!kubectl delete -f ./models/sklearn-iris-gs.yaml -n ${NAMESPACE}

model.mlops.seldon.io "iris" deleted


In [67]:
!kubectl delete -f ./servers/mlserver2.yaml

server.mlops.seldon.io "mlserver2" deleted


In [68]:
!helm upgrade --install seldon-v2-servers ../k8s/helm-charts/seldon-core-v2-servers  -n seldon-mesh --wait

Release "seldon-v2-servers" has been upgraded. Happy Helming!
NAME: seldon-v2-servers
LAST DEPLOYED: Mon Jul 31 14:46:28 2023
NAMESPACE: seldon-mesh
STATUS: deployed
REVISION: 9
TEST SUITE: None


## Cause OOM on Server and validate Models can be resheduled

## Model code changes
  * remove finalizer but ensure model delete message gets through and Model is marked for deletion.
  * Check cases to ensure failures to Delete allow new model to be created with same name.
    * Maybe need unique hash so agent can remove Model if new load comes in for smae name with new Hash