Skip to content

Commit

Permalink
Add Galactica models to FasterTransformer fauxpilot#2
Browse files Browse the repository at this point in the history
Triton with FasterTransformer for Galactica 6.7b works now
  • Loading branch information
BlackHC committed Jan 14, 2023
1 parent 303ef0a commit 2ada95e
Show file tree
Hide file tree
Showing 8 changed files with 350 additions and 16 deletions.
2 changes: 2 additions & 0 deletions converter/download_and_convert_model.sh
Expand Up @@ -17,6 +17,8 @@ fi

echo ${MODEL_TYPE}

export HUGGINGFACE_HUB_CACHE=/root/.cache/huggingface/hub

# If the model type is codegen, then we need to download the codegen and convert it:
if [[ ${MODEL_TYPE} == codegen ]]; then
exit
Expand Down
2 changes: 1 addition & 1 deletion converter/huggingface_opt_convert.py
Expand Up @@ -401,7 +401,7 @@ def split_and_convert(args):
parser.add_argument('-trained_gpu_num', '-t_g', type=int, help='How many gpus for inference', default=1)
parser.add_argument('-infer_gpu_num', '-i_g', type=int, help='How many gpus for inference', required=True)
parser.add_argument("-processes", "-p", type=int, help="How many processes to spawn for conversion (default: 4)", default=4)
parser.add_argument("-weight_data_type", type=str, default="fp32", choices=["fp32", "fp16"])
parser.add_argument("-weight_data_type", type=str, default="fp16", choices=["fp32", "fp16"])
parser.add_argument("-quantize",
help="Store selected in int8, run the models to determine scaling factors",
action="store_true")
Expand Down
Expand Up @@ -93,7 +93,7 @@ input [
},
{
name: "random_seed"
data_type: TYPE_UINT64
data_type: TYPE_INT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
Expand Down
331 changes: 331 additions & 0 deletions converter/models/galactica-6.7b-1gpu/fastertransformer/config.pbtxt
@@ -0,0 +1,331 @@
name: "fastertransformer"
backend: "fastertransformer"
default_model_filename: "facebook/galactica-6.7b"
max_batch_size: 1024

model_transaction_policy {
decoupled: False
}


dynamic_batching {
max_queue_delay_microseconds: 50000
}

batch_input [
{
kind: BATCH_ITEM_SHAPE
target_name: "input_ids_item_shape"
data_type: TYPE_INT32
source_input: "input_ids"
}
]

# this is not needed when not using request prompt embedding
#batch_input [
# {
# kind: BATCH_ITEM_SHAPE
# target_name: "request_prompt_embedding_item_shape"
# data_type: TYPE_INT32
# source_input: "request_prompt_embedding"
# }
#]

input [
{
name: "input_ids"
data_type: TYPE_UINT32
dims: [ -1 ]
allow_ragged_batch: true
},
{
name: "input_lengths"
data_type: TYPE_UINT32
dims: [ 1 ]
reshape: { shape: [ ] }
},
{
name: "request_output_len"
data_type: TYPE_UINT32
dims: [ -1 ]
},
{
name: "runtime_top_k"
data_type: TYPE_UINT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "runtime_top_p"
data_type: TYPE_FP32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "beam_search_diversity_rate"
data_type: TYPE_FP32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "temperature"
data_type: TYPE_FP32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "len_penalty"
data_type: TYPE_FP32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "repetition_penalty"
data_type: TYPE_FP32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "random_seed"
data_type: TYPE_INT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "is_return_log_probs"
data_type: TYPE_BOOL
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "is_return_context_embeddings"
data_type: TYPE_BOOL
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "beam_width"
data_type: TYPE_UINT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "start_id"
data_type: TYPE_UINT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "end_id"
data_type: TYPE_UINT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "stop_words_list"
data_type: TYPE_INT32
dims: [ 2, -1 ]
optional: true
},
{
name: "bad_words_list"
data_type: TYPE_INT32
dims: [ 2, -1 ]
optional: true
},
{
name: "prompt_learning_task_name_ids"
data_type: TYPE_UINT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "request_prompt_embedding"
data_type: TYPE_FP16
dims: [ -1, -1 ]
optional: true
allow_ragged_batch: false
},
{
name: "request_prompt_lengths"
data_type: TYPE_UINT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "request_prompt_type"
data_type: TYPE_UINT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "top_p_decay"
data_type: TYPE_FP32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "top_p_min"
data_type: TYPE_FP32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "top_p_reset_ids"
data_type: TYPE_UINT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
}
]
output [
{
name: "output_ids"
data_type: TYPE_UINT32
dims: [ -1, -1 ]
},
{
name: "sequence_length"
data_type: TYPE_UINT32
dims: [ -1 ]
},
{
name: "response_input_lengths"
data_type: TYPE_INT32
dims: [ -1 ]
},
{
name: "cum_log_probs"
data_type: TYPE_FP32
dims: [ -1 ]
},
{
name: "output_log_probs"
data_type: TYPE_FP32
dims: [ -1, -1 ]
},
{
name: "context_embeddings"
data_type: TYPE_FP32
dims: [ -1, -1 ]
}
]
instance_group [
{
count: 1
kind : KIND_CPU
}
]
parameters {
key: "tensor_para_size"
value: {
string_value: "1"
}
}
parameters {
key: "pipeline_para_size"
value: {
string_value: "1"
}
}
parameters {
key: "max_seq_len"
value: {
string_value: "2048"
}
}
parameters {
key: "is_half"
value: {
string_value: "1"
}
}
parameters {
key: "head_num"
value: {
string_value: "32"
}
}
parameters {
key: "size_per_head"
value: {
string_value: "128"
}
}
parameters {
key: "inter_size"
value: {
string_value: "16384"
}
}
parameters {
key: "vocab_size"
value: {
string_value: "50000"
}
}
parameters {
key: "start_id"
value: {
string_value: "0"
}
}
parameters {
key: "end_id"
value: {
string_value: "2"
}
}
parameters {
key: "decoder_layers"
value: {
string_value: "32"
}
}
parameters {
key: "data_type"
value: {
string_value: "fp16"
}
}
parameters {
key: "model_type"
value: {
string_value: "GPT"
}
}
parameters {
key: "model_checkpoint_path"
value: {
string_value: "/model/fastertransformer/1/1-gpu"
}
}
parameters {
key: "int8_mode"
value: {
string_value: "0"
}
}
parameters {
key: "enable_custom_all_reduce"
value: {
string_value: "0"
}
}

0 comments on commit 2ada95e

Please sign in to comment.