forked from fauxpilot/fauxpilot
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Galactica models to FasterTransformer fauxpilot#2
Triton with FasterTransformer for Galactica 6.7b works now
- Loading branch information
Showing
8 changed files
with
350 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
331 changes: 331 additions & 0 deletions
331
converter/models/galactica-6.7b-1gpu/fastertransformer/config.pbtxt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,331 @@ | ||
name: "fastertransformer" | ||
backend: "fastertransformer" | ||
default_model_filename: "facebook/galactica-6.7b" | ||
max_batch_size: 1024 | ||
|
||
model_transaction_policy { | ||
decoupled: False | ||
} | ||
|
||
|
||
dynamic_batching { | ||
max_queue_delay_microseconds: 50000 | ||
} | ||
|
||
batch_input [ | ||
{ | ||
kind: BATCH_ITEM_SHAPE | ||
target_name: "input_ids_item_shape" | ||
data_type: TYPE_INT32 | ||
source_input: "input_ids" | ||
} | ||
] | ||
|
||
# this is not needed when not using request prompt embedding | ||
#batch_input [ | ||
# { | ||
# kind: BATCH_ITEM_SHAPE | ||
# target_name: "request_prompt_embedding_item_shape" | ||
# data_type: TYPE_INT32 | ||
# source_input: "request_prompt_embedding" | ||
# } | ||
#] | ||
|
||
input [ | ||
{ | ||
name: "input_ids" | ||
data_type: TYPE_UINT32 | ||
dims: [ -1 ] | ||
allow_ragged_batch: true | ||
}, | ||
{ | ||
name: "input_lengths" | ||
data_type: TYPE_UINT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
}, | ||
{ | ||
name: "request_output_len" | ||
data_type: TYPE_UINT32 | ||
dims: [ -1 ] | ||
}, | ||
{ | ||
name: "runtime_top_k" | ||
data_type: TYPE_UINT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "runtime_top_p" | ||
data_type: TYPE_FP32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "beam_search_diversity_rate" | ||
data_type: TYPE_FP32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "temperature" | ||
data_type: TYPE_FP32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "len_penalty" | ||
data_type: TYPE_FP32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "repetition_penalty" | ||
data_type: TYPE_FP32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "random_seed" | ||
data_type: TYPE_INT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "is_return_log_probs" | ||
data_type: TYPE_BOOL | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "is_return_context_embeddings" | ||
data_type: TYPE_BOOL | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "beam_width" | ||
data_type: TYPE_UINT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "start_id" | ||
data_type: TYPE_UINT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "end_id" | ||
data_type: TYPE_UINT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "stop_words_list" | ||
data_type: TYPE_INT32 | ||
dims: [ 2, -1 ] | ||
optional: true | ||
}, | ||
{ | ||
name: "bad_words_list" | ||
data_type: TYPE_INT32 | ||
dims: [ 2, -1 ] | ||
optional: true | ||
}, | ||
{ | ||
name: "prompt_learning_task_name_ids" | ||
data_type: TYPE_UINT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "request_prompt_embedding" | ||
data_type: TYPE_FP16 | ||
dims: [ -1, -1 ] | ||
optional: true | ||
allow_ragged_batch: false | ||
}, | ||
{ | ||
name: "request_prompt_lengths" | ||
data_type: TYPE_UINT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "request_prompt_type" | ||
data_type: TYPE_UINT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "top_p_decay" | ||
data_type: TYPE_FP32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "top_p_min" | ||
data_type: TYPE_FP32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "top_p_reset_ids" | ||
data_type: TYPE_UINT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
} | ||
] | ||
output [ | ||
{ | ||
name: "output_ids" | ||
data_type: TYPE_UINT32 | ||
dims: [ -1, -1 ] | ||
}, | ||
{ | ||
name: "sequence_length" | ||
data_type: TYPE_UINT32 | ||
dims: [ -1 ] | ||
}, | ||
{ | ||
name: "response_input_lengths" | ||
data_type: TYPE_INT32 | ||
dims: [ -1 ] | ||
}, | ||
{ | ||
name: "cum_log_probs" | ||
data_type: TYPE_FP32 | ||
dims: [ -1 ] | ||
}, | ||
{ | ||
name: "output_log_probs" | ||
data_type: TYPE_FP32 | ||
dims: [ -1, -1 ] | ||
}, | ||
{ | ||
name: "context_embeddings" | ||
data_type: TYPE_FP32 | ||
dims: [ -1, -1 ] | ||
} | ||
] | ||
instance_group [ | ||
{ | ||
count: 1 | ||
kind : KIND_CPU | ||
} | ||
] | ||
parameters { | ||
key: "tensor_para_size" | ||
value: { | ||
string_value: "1" | ||
} | ||
} | ||
parameters { | ||
key: "pipeline_para_size" | ||
value: { | ||
string_value: "1" | ||
} | ||
} | ||
parameters { | ||
key: "max_seq_len" | ||
value: { | ||
string_value: "2048" | ||
} | ||
} | ||
parameters { | ||
key: "is_half" | ||
value: { | ||
string_value: "1" | ||
} | ||
} | ||
parameters { | ||
key: "head_num" | ||
value: { | ||
string_value: "32" | ||
} | ||
} | ||
parameters { | ||
key: "size_per_head" | ||
value: { | ||
string_value: "128" | ||
} | ||
} | ||
parameters { | ||
key: "inter_size" | ||
value: { | ||
string_value: "16384" | ||
} | ||
} | ||
parameters { | ||
key: "vocab_size" | ||
value: { | ||
string_value: "50000" | ||
} | ||
} | ||
parameters { | ||
key: "start_id" | ||
value: { | ||
string_value: "0" | ||
} | ||
} | ||
parameters { | ||
key: "end_id" | ||
value: { | ||
string_value: "2" | ||
} | ||
} | ||
parameters { | ||
key: "decoder_layers" | ||
value: { | ||
string_value: "32" | ||
} | ||
} | ||
parameters { | ||
key: "data_type" | ||
value: { | ||
string_value: "fp16" | ||
} | ||
} | ||
parameters { | ||
key: "model_type" | ||
value: { | ||
string_value: "GPT" | ||
} | ||
} | ||
parameters { | ||
key: "model_checkpoint_path" | ||
value: { | ||
string_value: "/model/fastertransformer/1/1-gpu" | ||
} | ||
} | ||
parameters { | ||
key: "int8_mode" | ||
value: { | ||
string_value: "0" | ||
} | ||
} | ||
parameters { | ||
key: "enable_custom_all_reduce" | ||
value: { | ||
string_value: "0" | ||
} | ||
} |
Oops, something went wrong.