# Triton Testing

## Local Setup


* `make start-scheduler` 
* `make start-envoy`
* `make start-triton-agent`
* `make start-triton`
* `make start-rclone`

In [1]:
!grpcurl -d '{"model":{ \
              "meta":{"name":"tfsimple"},\
              "modelSpec":{"uri":"gs://seldon-models/triton/simple",\
                           "requirements":["tensorflow"],\
                           "memoryBytes":500},\
              "deploymentSpec":{"replicas":1}}}' \
         -plaintext \
         -import-path ../../apis \
         -proto ../../apis/mlops/scheduler/scheduler.proto  0.0.0.0:9004 seldon.mlops.scheduler.Scheduler/LoadModel

Failed to dial target host "0.0.0.0:9004": dial tcp 0.0.0.0:9004: connect: connection refused


In [2]:
!grpcurl -d '{"model":{"name":"tfsimple"}}' \
         -plaintext \
         -import-path ../../apis \
         -proto ../../apis/mlops/scheduler/scheduler.proto  0.0.0.0:9004 seldon.mlops.scheduler.Scheduler/ModelStatus

{
  "modelName": "iris",
  "versions": [
    {
      "version": 1,
      "serverName": "triton",
      "modelReplicaState": {
        "0": {
          "state": "Available",
          "lastChangeTimestamp": "2022-01-14T17:58:20.387312101Z"
        }
      },
      "state": {
        "state": "ModelAvailable",
        "availableReplicas": 1,
        "lastChangeTimestamp": "2022-01-14T17:58:20.387312101Z"
      }
    }
  ]
}


In [3]:
!curl -v http://0.0.0.0:9000/v2/models/tfsimple/infer -H "Content-Type: application/json" -H "seldon-model: tfsimple"\
        -d '{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}'

* Expire in 0 ms for 6 (transfer 0x55d809a724f0)
*   Trying 0.0.0.0...
* TCP_NODELAY set
* Expire in 200 ms for 4 (transfer 0x55d809a724f0)
* Connected to 0.0.0.0 (127.0.0.1) port 9000 (#0)
> POST /v2/models/iris/infer HTTP/1.1
> Host: iris
> User-Agent: curl/7.64.0
> Accept: */*
> Content-Type: application/json
> Content-Length: 212
> 
* upload completely sent off: 212 out of 212 bytes
< HTTP/1.1 200 OK
< content-type: application/json
< content-length: 253
< x-envoy-upstream-service-time: 3
< date: Fri, 14 Jan 2022 18:01:32 GMT
< server: envoy
< 
* Connection #0 to host 0.0.0.0 left intact
{"model_name":"iris","model_version":"1","outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":[1,16],"data":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]},{"name":"OUTPUT1","datatype":"INT32","shape":[1,16],"data":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}]}

In [4]:
!cd ../v2 && \
    grpcurl -d '{"model_name":"tfsimple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' \
        -plaintext \
        -proto grpc_service.proto \
        -rpc-header seldon-model:iris \
        0.0.0.0:9000 inference.GRPCInferenceService/ModelInfer

{
  "modelName": "iris",
  "modelVersion": "1",
  "outputs": [
    {
      "name": "OUTPUT0",
      "datatype": "INT32",
      "shape": [
        "1",
        "16"
      ]
    },
    {
      "name": "OUTPUT1",
      "datatype": "INT32",
      "shape": [
        "1",
        "16"
      ]
    }
  ],
  "rawOutputContents": [
    "AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==",
    "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="
  ]
}


In [5]:
!grpcurl -d '{"model":{"name":"tfsimple"}}' \
         -plaintext \
         -import-path ../../apis \
         -proto ../../apis/mlops/scheduler/scheduler.proto  0.0.0.0:9004 seldon.mlops.scheduler.Scheduler/UnloadModel

{
  
}


#  K8S Test

Assumes you have a Kind cluster running with metallb. No other dependencies required.
You can use [Seldon Ansible Kind Playbook](https://github.com/SeldonIO/ansible-k8s-collection/blob/master/playbooks/kind.yaml)

## Setup

* `make kind-image-install-all`
* `make deploy` 


In [37]:
SCHEDULER_IP=!kubectl get svc seldon-scheduler -n seldon-mesh -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
SCHEDULER_IP=SCHEDULER_IP[0]
import os
os.environ['SCHEDULER_IP'] = SCHEDULER_IP

In [38]:
MESH_IP=!kubectl get svc seldon-mesh -n seldon-mesh -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
MESH_IP=MESH_IP[0]
import os
os.environ['MESH_IP'] = MESH_IP

In [39]:
!grpcurl -d '{"model":{ \
              "meta":{"name":"tfsimple"},\
              "modelSpec":{"uri":"gs://seldon-models/triton/simple",\
                           "requirements":["tensorflow"],\
                           "memoryBytes":500},\
              "deploymentSpec":{"replicas":1}}}' \
         -plaintext \
         -import-path ../../apis \
         -proto ../../apis/mlops/scheduler/scheduler.proto  ${SCHEDULER_IP}:9004 seldon.mlops.scheduler.Scheduler/LoadModel

{
  
}


In [40]:
!grpcurl -d '{"model":{"name":"tfsimple"}}' \
         -plaintext \
         -import-path ../../apis \
         -proto ../../apis/mlops/scheduler/scheduler.proto  ${SCHEDULER_IP}:9004 seldon.mlops.scheduler.Scheduler/ModelStatus

{
  "modelName": "tfsimple",
  "versions": [
    {
      "version": 1,
      "serverName": "triton",
      "modelReplicaState": {
        "0": {
          "state": "Available",
          "lastChangeTimestamp": "2022-01-27T19:33:35.798328336Z"
        }
      },
      "state": {
        "state": "ModelAvailable",
        "availableReplicas": 1,
        "lastChangeTimestamp": "2022-01-27T19:33:35.798328336Z"
      }
    }
  ]
}


In [41]:
!curl -v http://${MESH_IP}/v2/models/tfsimple/infer -H "Content-Type: application/json" -H "seldon-model: tfsimple"\
        -d '{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}'

* Expire in 0 ms for 6 (transfer 0x55aeb02334f0)
*   Trying 172.18.255.3...
* TCP_NODELAY set
* Expire in 200 ms for 4 (transfer 0x55aeb02334f0)
* Connected to 172.18.255.3 (172.18.255.3) port 80 (#0)
> POST /v2/models/tfsimple/infer HTTP/1.1
> Host: 172.18.255.3
> User-Agent: curl/7.64.0
> Accept: */*
> Content-Type: application/json
> seldon-model: tfsimple
> Content-Length: 212
> 
* upload completely sent off: 212 out of 212 bytes
< HTTP/1.1 200 OK
< content-type: application/json
< content-length: 257
< x-envoy-upstream-service-time: 75
< date: Thu, 27 Jan 2022 19:33:38 GMT
< server: envoy
< 
* Connection #0 to host 172.18.255.3 left intact
{"model_name":"tfsimple","model_version":"1","outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":[1,16],"data":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]},{"name":"OUTPUT1","datatype":"INT32","shape":[1,16],"data":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}]}

In [42]:
!cd ../v2 && \
    grpcurl -d '{"model_name":"tfsimple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' \
        -plaintext \
        -proto grpc_service.proto \
        -rpc-header seldon-model:tfsimple \
        ${MESH_IP}:80 inference.GRPCInferenceService/ModelInfer

{
  "modelName": "tfsimple",
  "modelVersion": "1",
  "outputs": [
    {
      "name": "OUTPUT0",
      "datatype": "INT32",
      "shape": [
        "1",
        "16"
      ]
    },
    {
      "name": "OUTPUT1",
      "datatype": "INT32",
      "shape": [
        "1",
        "16"
      ]
    }
  ],
  "rawOutputContents": [
    "AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==",
    "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="
  ]
}


In [43]:
!grpcurl -d '{"model":{"name":"tfsimple"}}' \
         -plaintext \
         -import-path ../../apis \
         -proto ../../apis/mlops/scheduler/scheduler.proto  ${SCHEDULER_IP}:9004 seldon.mlops.scheduler.Scheduler/UnloadModel

{
  
}
