forked from predibase/lorax
/
values.yaml
74 lines (62 loc) · 1.41 KB
/
values.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
deployment:
replicas: 1
updateStrategy: {}
image:
repository: "ghcr.io/predibase/lorax"
tag: "f76119a"
args:
modelId: "mistralai/Mistral-7B-Instruct-v0.1"
maxInputLength: 512
maxTotalTokens: 1024
maxBatchTotalTokens: 4096
maxBatchPrefillTokens: 2048
sharded: false
env:
# Your huggingface hub token. Required for some models such as the llama-2 family.
huggingFaceHubToken: ""
# Model types that support dynamic adapter loading
loraxEnabledModelTypes: "llama,mistral"
resources:
limits:
cpu: "8"
ephemeral-storage: 100Gi
memory: 27041Mi
nvidia.com/gpu: "1"
requests:
cpu: "8"
ephemeral-storage: 100Gi
memory: 27041Mi
nvidia.com/gpu: "1"
livenessProbe:
failureThreshold: 240
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
readinessProbe:
failureThreshold: 600
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
nodeSelector: {}
tolerations: []
additionalLabels: {}
additionalPodLabels: {}
additionalAnnotations: {}
additionalPodAnnotations: {}
affinity: {}
priorityClassName: ""
service:
name: "lorax"
serviceType: ClusterIP
port: 80
additionalLabels: {}