# Deploy TRTIS

In [176]:
%%writefile trtis_service.yaml

apiVersion: v1
kind: Service
metadata:
  labels:
    name: inference-server
  name: inference-server
  namespace: default
spec:
  #externalTrafficPolicy: Cluster
  ports:
  - name: http-inference-server
    port: 8000
    protocol: TCP
    targetPort: 8000
  - name: grpc-inference-server
    port: 8001
    protocol: TCP
    targetPort: 8001
  - name: metrics-inference-server
    port: 8002
    protocol: TCP
    targetPort: 8002
  selector:
    app: inference-server
  sessionAffinity: None
  type: ClusterIP
  #type: LoadBalancer

Overwriting trtis_service.yaml


In [177]:
%%writefile trtis_deploy.yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: inference-server
  labels:
    name: inference-server
spec:
  replicas: 1
  selector:
    matchLabels:
      app: inference-server
  template:
    metadata:
      labels:
        app: inference-server
    spec:
      dnsPolicy: ClusterFirst
      imagePullSecrets:
      - name: ngc
      priority: 0
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      serviceAccount: default
      serviceAccountName: default
      terminationGracePeriodSeconds: 30
      containers:
      - args:
        - trtserver
        - --model-store=gs://sandbox-kathryn-models/resnet/
        image: nvcr.io/nvidia/tensorrtserver:19.05-py3
        imagePullPolicy: IfNotPresent
        livenessProbe:
          failureThreshold: 3
          httpGet:
            path: /api/health/live
            port: 8000
            scheme: HTTP
          initialDelaySeconds: 5
          periodSeconds: 5
          successThreshold: 1
          timeoutSeconds: 1
        name: inference-server
        ports:
        - containerPort: 8000
          protocol: TCP
        - containerPort: 8001
          protocol: TCP
        - containerPort: 8002
          protocol: TCP
        readinessProbe:
          failureThreshold: 3
          httpGet:
            path: /api/health/ready
            port: 8000
            scheme: HTTP
          initialDelaySeconds: 5
          periodSeconds: 5
          successThreshold: 1
          timeoutSeconds: 1
        resources:
          limits:
            nvidia.com/gpu: "1"
          requests:
            cpu: 1000m
            nvidia.com/gpu: "1"
        securityContext:
          procMount: Default
          runAsUser: 1000
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File


Overwriting trtis_deploy.yaml


In [178]:
!kubectl delete -f trtis_service.yaml

service "inference-server" deleted


In [179]:
!kubectl create -f trtis_service.yaml

service/inference-server created


In [181]:
!kubectl get svc

NAME               TYPE           CLUSTER-IP      EXTERNAL-IP     PORT(S)                                        AGE
inference-server   ClusterIP      10.59.254.225   <none>          8000/TCP,8001/TCP,8002/TCP                     6m22s
kubernetes         ClusterIP      10.59.240.1     <none>          443/TCP                                        104m
locust-master      LoadBalancer   10.59.242.220   35.200.99.104   8089:32253/TCP,5557:30210/TCP,5558:30840/TCP   62m


In [153]:
!kubectl get pods

NAME                                READY   STATUS    RESTARTS   AGE
inference-server-56588945c6-27l95   1/1     Running   0          36m
locust-master-795dc864b4-2qllm      1/1     Running   0          4m25s
locust-slave-744f96d97c-7cjcp       1/1     Running   0          4m24s
locust-slave-744f96d97c-q8zxr       1/1     Running   0          4m24s
locust-slave-744f96d97c-qfvvd       1/1     Running   0          4m24s


In [134]:
!kubectl get svc

NAME               TYPE           CLUSTER-IP      EXTERNAL-IP     PORT(S)                                        AGE
inference-server   ClusterIP      10.59.246.232   <none>          8000/TCP,8001/TCP,8002/TCP                     21m
kubernetes         ClusterIP      10.59.240.1     <none>          443/TCP                                        54m
locust-master      LoadBalancer   10.59.242.220   35.200.99.104   8089:32253/TCP,5557:30210/TCP,5558:30840/TCP   12m


In [116]:
!kubectl apply -f trtis_deploy.yaml

deployment.apps/inference-server created


In [130]:
!kubectl get pods

NAME                                READY   STATUS    RESTARTS   AGE
inference-server-56588945c6-27l95   1/1     Running   0          3m55s


# Set up Prometheus

In [97]:
%%writefile clusterRole.yml

apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
  name: prometheus
rules:
- apiGroups: [""]
  resources:
  - nodes
  - nodes/proxy
  - services
  - endpoints
  - pods
  verbs: ["get", "list", "watch"]
- apiGroups:
  - extensions
  resources:
  - ingresses
  verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
  verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: prometheus
subjects:
- kind: ServiceAccount
  name: default
  namespace: monitoring

Overwriting clusterRole.yml


In [98]:
!kubectl get svc

NAME               TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)                      AGE
inference-server   ClusterIP   10.59.246.232   <none>        8000/TCP,8001/TCP,8002/TCP   41s
kubernetes         ClusterIP   10.59.240.1     <none>        443/TCP                      33m


In [100]:
!kubectl get svc inference-server -o "jsonpath={.spec['clusterIP']}"

10.59.246.232

In [213]:
%%writefile prometheus-configmap.yml

apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-server-conf
  labels:
    name: prometheus-server-conf
  namespace: monitoring
data:
  prometheus.yml: |-
    # my global config
    global:
      scrape_interval:     10s
      evaluation_interval: 10s
      # scrape_timeout is set to the global default (10s).

    # Alertmanager configuration
    alerting:
      alertmanagers:
      - static_configs:
        - targets:
          # - alertmanager:9093

    # Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
    rule_files:
      # - "first_rules.yml"
      # - "second_rules.yml"

    # A scrape configuration containing exactly one endpoint to scrape:
    # Here it's Prometheus itself.
    scrape_configs:
      # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
      - job_name: 'prometheus'

        # metrics_path defaults to '/metrics'
        # scheme defaults to 'http'.

        static_configs:
        - targets: ['10.59.254.225:8002']

Overwriting prometheus-configmap.yml


In [214]:
%%writefile prometheus-deployment.yml

apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: prometheus-deployment
  namespace: monitoring
spec:
  replicas: 1
  template:
    metadata:
      labels:
        app: prometheus-server
    spec:
      containers:
        - name: prometheus
          image: prom/prometheus:latest
          args:
            - "--config.file=/etc/prometheus/prometheus.yml"
            - "--storage.tsdb.path=/prometheus/"
          ports:
            - containerPort: 9090
          volumeMounts:
            - name: prometheus-config-volume
              mountPath: /etc/prometheus
            - name: prometheus-storage-volume
              mountPath: /prometheus
      volumes:
        - name: prometheus-config-volume
          configMap:
            defaultMode: 420
            name: prometheus-server-conf
        - name: prometheus-storage-volume
          emptyDir: {}

Overwriting prometheus-deployment.yml


In [184]:
%%writefile prometheus-service.yml

apiVersion: v1
kind: Service
metadata:
  name: prometheus-service
spec:
  selector: 
    app: prometheus-server
  type: ClusterIP
  ports:
    - port: 8080
      targetPort: 9090

Overwriting prometheus-service.yml


In [185]:
!kubectl create namespace monitoring
!kubectl create -f clusterRole.yml

Error from server (AlreadyExists): namespaces "monitoring" already exists


In [216]:
!kubectl delete -f prometheus-configmap.yml -n monitoring
!kubectl delete -f prometheus-deployment.yml -n monitoring

configmap "prometheus-server-conf" deleted
deployment.extensions "prometheus-deployment" deleted


In [217]:
!kubectl apply -f prometheus-configmap.yml -n monitoring
!kubectl apply -f prometheus-deployment.yml -n monitoring

configmap/prometheus-server-conf created
deployment.extensions/prometheus-deployment created


In [None]:
!kubectl delete -f prometheus-service.yml -n monitoring
!kubectl apply -f prometheus-service.yml -n monitoring

In [192]:
!kubectl get svc -n monitoring

NAME                 TYPE           CLUSTER-IP      EXTERNAL-IP      PORT(S)          AGE
grafana-service      LoadBalancer   10.59.242.109   35.243.122.150   8100:30019/TCP   88m
prometheus-service   ClusterIP      10.59.249.144   <none>           8080/TCP         6s


# Set up Grafana

In [61]:
%%writefile grafana-deployment.yml

apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: grafana-deployment
  namespace: monitoring
spec:
  replicas: 1
  template:
    metadata:
      labels:
        app: grafana-server
    spec:
      containers:
        - name: grafana
          image: grafana/grafana:latest
          #args:
          #  - "--config.file=/root/prometheus.yml"
          #  - "--storage.tsdb.path=/prometheus/"
          ports:
            - containerPort: 3000

Overwriting grafana-deployment.yml


In [62]:
%%writefile grafana-service.yml

apiVersion: v1
kind: Service
metadata:
  name: grafana-service
spec:
  selector: 
    app: grafana-server
  type: LoadBalancer
  ports:
    - port: 8100
      targetPort: 3000
      #nodePort: 30020

Overwriting grafana-service.yml


In [63]:
!kubectl apply -f grafana-deployment.yml -n monitoring

deployment.extensions/grafana-deployment created


In [64]:
!kubectl get pods -n monitoring

NAME                                     READY   STATUS              RESTARTS   AGE
grafana-deployment-644bbcb84-qlb2t       0/1     ContainerCreating   0          0s
prometheus-deployment-544b9b9f98-ml2vs   1/1     Running             0          2m3s


In [65]:
!kubectl create -f grafana-service.yml -n monitoring

service/grafana-service created


In [218]:
!kubectl get svc -n monitoring

NAME                 TYPE           CLUSTER-IP      EXTERNAL-IP      PORT(S)          AGE
grafana-service      LoadBalancer   10.59.242.109   35.243.122.150   8100:30019/TCP   31h
prometheus-service   ClusterIP      10.59.249.144   <none>           8080/TCP         30h
