From ba0f5b2358f550a06f2c35ac71ebb053c80bbb22 Mon Sep 17 00:00:00 2001 From: LZHgrla Date: Mon, 4 Sep 2023 19:37:48 +0800 Subject: [PATCH 01/25] refactor tools --- xtuner/tools/chat.py | 104 ++++---- xtuner/tools/chat_hf.py | 235 ------------------ .../tools/model_converters/adapter_pth2hf.py | 14 +- .../tools/model_converters/merge_adapter.py | 76 ++---- .../model_converters/merge_adapter_hf.py | 45 ---- 5 files changed, 81 insertions(+), 393 deletions(-) delete mode 100644 xtuner/tools/chat_hf.py delete mode 100644 xtuner/tools/model_converters/merge_adapter_hf.py diff --git a/xtuner/tools/chat.py b/xtuner/tools/chat.py index db3128e65..2af9143a4 100644 --- a/xtuner/tools/chat.py +++ b/xtuner/tools/chat.py @@ -1,35 +1,46 @@ # Copyright (c) OpenMMLab. All rights reserved. import argparse -import os import re import torch -from mmengine.config import Config, DictAction -from transformers import GenerationConfig +from peft import PeftModel +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig, GenerationConfig) -from xtuner.configs import cfgs_name_path -from xtuner.registry import BUILDER from xtuner.tools.utils import get_chat_utils, update_stop_criteria from xtuner.utils import PROMPT_TEMPLATE +def remove_prefix(state_dict, prefix): + new_state_dict = {} + for key, value in state_dict.items(): + if key.startswith(prefix): + new_key = key[len(prefix):] + new_state_dict[new_key] = value + else: + new_state_dict[key] = value + return new_state_dict + + def parse_args(): - parser = argparse.ArgumentParser( - description='Chat with a pretrained model') + parser = argparse.ArgumentParser(description='Chat with a HF model') parser.add_argument( - 'config', - help='config file name or path. Note: Please use the original ' - 'configs, instead of the automatically saved log configs.') - parser.add_argument('--adapter', default=None, help='adapter model') + 'model_name_or_path', help='Hugging Face model name or path') + parser.add_argument('--pretrained', default=None, help='pretrained path') + parser.add_argument('--adapter', default=None, help='adapter name or path') parser.add_argument( '--prompt-template', choices=PROMPT_TEMPLATE.keys(), default=None, help='Specify a prompt option') parser.add_argument( - '--is-deepspeed', - action='store_true', - help='whether the adapter is saved from deepspeed') + '--bits', + type=int, + choices=[4, 8, None], + default=None, + help='LLM bits') + parser.add_argument( + '--bot-name', type=str, default='BOT', help='Name for Bot') parser.add_argument( '--with-plugins', nargs='+', @@ -67,16 +78,6 @@ def parse_args(): type=int, default=0, help='Random seed for reproducible text generation') - parser.add_argument( - '--cfg-options', - nargs='+', - action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' - 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' - 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') args = parser.parse_args() return args @@ -119,29 +120,36 @@ def main(): torch.manual_seed(args.seed) - # parse config - if not os.path.isfile(args.config): - try: - args.config = cfgs_name_path[args.config] - except KeyError: - raise FileNotFoundError(f'Cannot find {args.config}') - - # load config - cfg = Config.fromfile(args.config) - if args.cfg_options is not None: - cfg.merge_from_dict(args.cfg_options) - - model = BUILDER.build(cfg.model) - # Cast to inference mode - model.llm.gradient_checkpointing_disable() - model.llm.config.use_cache = True - - tokenizer = BUILDER.build(cfg.tokenizer) - + # build model + quantization_config = None + load_in_8bit = False + if args.bits == 4: + quantization_config = BitsAndBytesConfig( + load_in_4bit=True, + load_in_8bit=False, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4') + elif args.bits == 8: + load_in_8bit = True + assert args.pretrained is None or args.bits is None + model = AutoModelForCausalLM.from_pretrained( + args.model_name_or_path, + quantization_config=quantization_config, + load_in_8bit=load_in_8bit, + device_map='auto', + trust_remote_code=True) + if args.pretrained is not None: + pretrained_ckpt = torch.load(args.pretrained, map_location='cpu') + pretrained_ckpt = remove_prefix(pretrained_ckpt, 'llm.') + model.load_state_dict(pretrained_ckpt) + print(f'Load pretrained weight from {args.pretrained}') + tokenizer = AutoTokenizer.from_pretrained( + args.model_name_or_path, trust_remote_code=True) if args.adapter is not None: - adapter = torch.load(args.adapter, map_location='cpu') - state_dict_key = 'module' if args.is_deepspeed else 'state_dict' - model.load_state_dict(adapter[state_dict_key], strict=False) + model = PeftModel.from_pretrained(model, args.adapter) print(f'Load adapter from {args.adapter}') Streamer, stop_criteria = get_chat_utils(model) @@ -173,10 +181,10 @@ def main(): template = PROMPT_TEMPLATE[args.prompt_template] if 'INSTRUCTION_START' in template and n_turn == 0: prompt_text = template['INSTRUCTION_START'].format( - input=text, round=n_turn + 1, **cfg) + input=text, round=n_turn + 1, bot_name=args.bot_name) else: prompt_text = template['INSTRUCTION'].format( - input=text, round=n_turn + 1, **cfg) + input=text, round=n_turn + 1, bot_name=args.bot_name) if args.prompt_template == 'moss_sft': if not inner_thoughts_open: prompt_text.replace('- Inner thoughts: enabled.', diff --git a/xtuner/tools/chat_hf.py b/xtuner/tools/chat_hf.py deleted file mode 100644 index 7cc81d89a..000000000 --- a/xtuner/tools/chat_hf.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import argparse -import re - -import torch -from peft import PeftModel -from transformers import (AutoModelForCausalLM, AutoTokenizer, - BitsAndBytesConfig, GenerationConfig) - -from xtuner.tools.utils import get_chat_utils, update_stop_criteria -from xtuner.utils import PROMPT_TEMPLATE - - -def parse_args(): - parser = argparse.ArgumentParser(description='Chat with a HF model') - parser.add_argument( - 'model_name_or_path', help='Hugging Face model name or path') - parser.add_argument('--adapter', default=None, help='adapter name or path') - parser.add_argument( - '--prompt-template', - choices=PROMPT_TEMPLATE.keys(), - default=None, - help='Specify a prompt option') - parser.add_argument( - '--bot-name', type=str, default='BOT', help='Name for Bot') - parser.add_argument( - '--with-plugins', - nargs='+', - choices=['calculate', 'solve', 'search'], - help='Specify plugins to use') - parser.add_argument( - '--no-streamer', action='store_true', help='Whether to with streamer') - parser.add_argument('--command-stop-word', default=None, help='Stop key') - parser.add_argument('--answer-stop-word', default=None, help='Stop key') - parser.add_argument( - '--max-new-tokens', - type=int, - default=2048, - help='Maximum number of new tokens allowed in generated text') - parser.add_argument( - '--temperature', - type=float, - default=0.1, - help='The value used to modulate the next token probabilities.') - parser.add_argument( - '--top-k', - type=int, - default=40, - help='The number of highest probability vocabulary tokens to ' - 'keep for top-k-filtering.') - parser.add_argument( - '--top-p', - type=float, - default=0.75, - help='If set to float < 1, only the smallest set of most probable ' - 'tokens with probabilities that add up to top_p or higher are ' - 'kept for generation.') - parser.add_argument( - '--seed', - type=int, - default=0, - help='Random seed for reproducible text generation') - args = parser.parse_args() - return args - - -def get_input(): - """Helper function for getting input from users.""" - sentinel = '' # ends when this string is seen - result = None - while result is None: - print('\ndouble enter to end input >>> ', end='') - try: - result = '\n'.join(iter(input, sentinel)) - except UnicodeDecodeError: - print('Invalid characters detected. Please enter again.') - return result - - -def main(): - args = parse_args() - - if args.with_plugins is None: - inner_thoughts_open = False - calculate_open = False - solve_open = False - search_open = False - else: - assert args.prompt_template == 'moss_sft' - from plugins import plugins_api - inner_thoughts_open = True - calculate_open = 'calculate' in args.with_plugins - solve_open = 'solve' in args.with_plugins - search_open = 'search' in args.with_plugins - # pre-import for api and model preparation - if calculate_open: - from plugins import calculate # noqa: F401 - if solve_open: - from plugins import solve # noqa: F401 - if search_open: - from plugins import search # noqa: F401 - - torch.manual_seed(args.seed) - - # build model - quantization_config = BitsAndBytesConfig( - load_in_4bit=True, - load_in_8bit=False, - llm_int8_threshold=6.0, - llm_int8_has_fp16_weight=False, - bnb_4bit_compute_dtype=torch.float16, - bnb_4bit_use_double_quant=True, - bnb_4bit_quant_type='nf4') - model = AutoModelForCausalLM.from_pretrained( - args.model_name_or_path, - quantization_config=quantization_config, - trust_remote_code=True) - tokenizer = AutoTokenizer.from_pretrained( - args.model_name_or_path, trust_remote_code=True) - if args.adapter is not None: - model = PeftModel.from_pretrained(model, args.adapter) - print(f'Load adapter from {args.adapter}') - - Streamer, stop_criteria = get_chat_utils(model) - if args.no_streamer: - Streamer = None - - command_stop_cr, answer_stop_cr = update_stop_criteria( - base=stop_criteria, - tokenizer=tokenizer, - command_stop_word=args.command_stop_word, - answer_stop_word=args.answer_stop_word) - - gen_config = GenerationConfig( - max_new_tokens=args.max_new_tokens, - do_sample=args.temperature > 0, - temperature=args.temperature, - top_p=args.top_p, - top_k=args.top_k, - ) - - n_turn = 0 - inputs = '' - while True: - text = get_input() - - if text == 'exit': - exit(0) - if args.prompt_template is not None: - template = PROMPT_TEMPLATE[args.prompt_template] - if 'INSTRUCTION_START' in template and n_turn == 0: - prompt_text = template['INSTRUCTION_START'].format( - input=text, round=n_turn + 1, bot_name=args.bot_name) - else: - prompt_text = template['INSTRUCTION'].format( - input=text, round=n_turn + 1, bot_name=args.bot_name) - if args.prompt_template == 'moss_sft': - if not inner_thoughts_open: - prompt_text.replace('- Inner thoughts: enabled.', - '- Inner thoughts: disabled.') - if not calculate_open: - prompt_text.replace( - '- Calculator: enabled. API: Calculate(expression)', - '- Calculator: disabled.') - if not solve_open: - prompt_text.replace( - '- Equation solver: enabled. API: Solve(equation)', - '- Equation solver: disabled.') - if not search_open: - prompt_text.replace( - '- Web search: enabled. API: Search(query)', - '- Web search: disabled.') - - inputs += prompt_text - else: - inputs += text - ids = tokenizer.encode(inputs, return_tensors='pt') - streamer = Streamer(tokenizer) if Streamer is not None else None - if args.with_plugins is not None: - generate_output = model.generate( - inputs=ids.cuda(), - generation_config=gen_config, - streamer=streamer, - stopping_criteria=command_stop_cr).cpu() - generate_output_text = tokenizer.decode( - generate_output[0][len(ids[0]):]) - if streamer is None: - end = '' if generate_output_text[-1] == '\n' else '\n' - print(generate_output_text, end=end) - pattern = r'<\|Commands\|>:(.*?)' - command_text = ', '.join(re.findall(pattern, generate_output_text)) - extent_text = plugins_api( - command_text, - calculate_open=calculate_open, - solve_open=solve_open, - search_open=search_open) - end = '' if extent_text[-1] == '\n' else '\n' - print(extent_text, end=end) - extent_text_ids = tokenizer.encode( - extent_text, return_tensors='pt', add_special_tokens=False) - new_ids = torch.cat((generate_output, extent_text_ids), dim=1) - new_streamer = Streamer( - tokenizer) if Streamer is not None else None - generate_output = model.generate( - inputs=new_ids.cuda(), - generation_config=gen_config, - streamer=new_streamer, - stopping_criteria=answer_stop_cr) - if streamer is None: - output_text = tokenizer.decode( - generate_output[0][len(new_ids[0]):]) - end = '' if output_text[-1] == '\n' else '\n' - print(output_text, end=end) - else: - generate_output = model.generate( - inputs=ids.cuda(), - generation_config=gen_config, - streamer=streamer, - stopping_criteria=answer_stop_cr) - if streamer is None: - output_text = tokenizer.decode( - generate_output[0][len(ids[0]):]) - end = '' if output_text[-1] == '\n' else '\n' - print(output_text, end=end) - inputs = tokenizer.decode(generate_output[0]) - n_turn += 1 - if len(generate_output[0]) >= args.max_new_tokens: - print('Remove the memory of history responses, since ' - f'it exceeds the length limitation {args.max_new_tokens}.') - n_turn = 0 - inputs = '' - - -if __name__ == '__main__': - main() diff --git a/xtuner/tools/model_converters/adapter_pth2hf.py b/xtuner/tools/model_converters/adapter_pth2hf.py index 80ae2e209..ce69c90a9 100644 --- a/xtuner/tools/model_converters/adapter_pth2hf.py +++ b/xtuner/tools/model_converters/adapter_pth2hf.py @@ -54,18 +54,12 @@ def main(): if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) - # load on cpu - cfg.model.llm.device_map = 'cpu' - if cfg.model.llm.get('quantization_config'): - cfg.model.llm.quantization_config.\ - llm_int8_enable_fp32_cpu_offload = True - model = BUILDER.build(cfg.model) - adapter_checkpoint = torch.load( - args.adapter_checkpoint, map_location='cpu') - state_dict_key = 'module' if args.is_deepspeed else 'state_dict' - model.load_state_dict(adapter_checkpoint[state_dict_key], strict=False) + state_dict = torch.load(args.adapter_checkpoint, map_location='cpu') + if not args.is_deepspeed: + state_dict = state_dict['state_dict'] + model.load_state_dict(state_dict, strict=False) print(f'Load adapter from {args.adapter_checkpoint}') mkdir_or_exist(args.save_dir) diff --git a/xtuner/tools/model_converters/merge_adapter.py b/xtuner/tools/model_converters/merge_adapter.py index 7383f23dc..2de6bc23a 100644 --- a/xtuner/tools/model_converters/merge_adapter.py +++ b/xtuner/tools/model_converters/merge_adapter.py @@ -1,74 +1,40 @@ # Copyright (c) OpenMMLab. All rights reserved. import argparse -import os import torch -from mmengine.config import Config, DictAction - -from xtuner.configs import cfgs_name_path -from xtuner.registry import BUILDER +from peft import PeftModel +from transformers import AutoModelForCausalLM, AutoTokenizer def parse_args(): - parser = argparse.ArgumentParser(description='Merge a pth adapter to LLM') - parser.add_argument( - 'config', - help='config file name or path. Note: Please use the original ' - 'configs, instead of the automatically saved log configs.') - parser.add_argument('adapter_checkpoint', help='adapter checkpoint file') + parser = argparse.ArgumentParser( + description='Merge a HuggingFace adapter to LLM') + parser.add_argument('model_name_or_path', help='model name or path') + parser.add_argument('adapter_name_or_path', help='adapter name or path') parser.add_argument( 'save_dir', help='the directory to save the merged model') parser.add_argument('--max-shard-size', type=str, default='2GB') - parser.add_argument( - '--is-deepspeed', - action='store_true', - help='whether the adapter is saved from deepspeed') - parser.add_argument( - '--cfg-options', - nargs='+', - action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' - 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' - 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') args = parser.parse_args() return args def main(): args = parse_args() - - # parse config - if not os.path.isfile(args.config): - try: - args.config = cfgs_name_path[args.config] - except KeyError: - raise FileNotFoundError(f'Cannot find {args.config}') - - # load config - cfg = Config.fromfile(args.config) - if args.cfg_options is not None: - cfg.merge_from_dict(args.cfg_options) - - # load on cpu, with non-quantized - cfg.model.llm.device_map = 'cpu' - cfg.model.llm.quantization_config = None - cfg.model.llm.low_cpu_mem_usage = True - torch_dtype = cfg.model.llm.get('torch_dtype', torch.float16) - model = BUILDER.build(cfg.model) - tokenizer = BUILDER.build(cfg.tokenizer) - adapter_checkpoint = torch.load( - args.adapter_checkpoint, map_location='cpu') - state_dict_key = 'module' if args.is_deepspeed else 'state_dict' - model.load_state_dict(adapter_checkpoint[state_dict_key], strict=False) - print(f'Load adapter from {args.adapter_checkpoint}') - - model = model.llm - model_merged = model.merge_and_unload() - for param in model.parameters(): - param.data = param.data.to(torch_dtype) + model = AutoModelForCausalLM.from_pretrained( + args.model_name_or_path, + torch_dtype=torch.float16, + low_cpu_mem_usage=True, + device_map='cpu', + trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained( + args.model_name_or_path, trust_remote_code=True) + model_unmerged = PeftModel.from_pretrained( + model, + args.adapter_name_or_path, + device_map='cpu', + torch_dtype=torch.float16, + is_trainable=False) + model_merged = model_unmerged.merge_and_unload() model_merged.save_pretrained( args.save_dir, max_shard_size=args.max_shard_size) tokenizer.save_pretrained(args.save_dir) diff --git a/xtuner/tools/model_converters/merge_adapter_hf.py b/xtuner/tools/model_converters/merge_adapter_hf.py deleted file mode 100644 index 2de6bc23a..000000000 --- a/xtuner/tools/model_converters/merge_adapter_hf.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import argparse - -import torch -from peft import PeftModel -from transformers import AutoModelForCausalLM, AutoTokenizer - - -def parse_args(): - parser = argparse.ArgumentParser( - description='Merge a HuggingFace adapter to LLM') - parser.add_argument('model_name_or_path', help='model name or path') - parser.add_argument('adapter_name_or_path', help='adapter name or path') - parser.add_argument( - 'save_dir', help='the directory to save the merged model') - parser.add_argument('--max-shard-size', type=str, default='2GB') - args = parser.parse_args() - return args - - -def main(): - args = parse_args() - model = AutoModelForCausalLM.from_pretrained( - args.model_name_or_path, - torch_dtype=torch.float16, - low_cpu_mem_usage=True, - device_map='cpu', - trust_remote_code=True) - tokenizer = AutoTokenizer.from_pretrained( - args.model_name_or_path, trust_remote_code=True) - model_unmerged = PeftModel.from_pretrained( - model, - args.adapter_name_or_path, - device_map='cpu', - torch_dtype=torch.float16, - is_trainable=False) - model_merged = model_unmerged.merge_and_unload() - model_merged.save_pretrained( - args.save_dir, max_shard_size=args.max_shard_size) - tokenizer.save_pretrained(args.save_dir) - print(f'Save to {args.save_dir}') - - -if __name__ == '__main__': - main() From 742b0e0c0385b6918e7ef6a034251e04171ee718 Mon Sep 17 00:00:00 2001 From: LZHgrla Date: Mon, 4 Sep 2023 19:38:10 +0800 Subject: [PATCH 02/25] modify entry_point --- xtuner/entry_point.py | 52 ++++++++----------------------------------- 1 file changed, 9 insertions(+), 43 deletions(-) diff --git a/xtuner/entry_point.py b/xtuner/entry_point.py index 628610d38..c57ff2810 100644 --- a/xtuner/entry_point.py +++ b/xtuner/entry_point.py @@ -8,10 +8,10 @@ from mmengine.logging import print_log import xtuner -from xtuner.tools import chat, chat_hf, copy_cfg, list_cfg, test, train +from xtuner.tools import chat, copy_cfg, list_cfg, test, train from xtuner.tools.data_preprocess import arxiv as arxiv_preprocess from xtuner.tools.model_converters import (adapter_pth2hf, merge_adapter, - merge_adapter_hf, split_hf_llm) + split_hf_llm) # Define valid modes MODES = ('list-cfg', 'copy-cfg', 'train', 'test', 'chat', 'convert', @@ -37,17 +37,13 @@ xtuner train $CONFIG 3-2. Fine-tune LLMs by multiple GPUs: NPROC_PER_NODE=$NGPUS NNODES=$NNODES NODE_RANK=$NODE_RANK PORT=$PORT ADDR=$ADDR xtuner dist_train $CONFIG $GPUS - 4-1. Chat with LLMs with HuggingFace's model and adapter: - xtuner chat hf $NAME_OR_PATH_TO_HF_MODEL --adapter $NAME_OR_PATH_TO_HF_ADAPTER --prompt-template $PROMPT_TEMPLATE - 4-2. Chat with LLMs with XTuner's config and adapter: - xtuner chat xtuner $CONFIG --adapter $PATH_TO_PTH_ADAPTER --prompt $PROMPT_TEMPLATE + 4. Chat with LLMs with HuggingFace's model and adapter: + xtuner chat $NAME_OR_PATH_TO_HF_MODEL --adapter $NAME_OR_PATH_TO_HF_ADAPTER --prompt-template $PROMPT_TEMPLATE 5-1. Convert the pth adapter to HuggingFace's adapter: xtuner convert adapter_pth2hf $CONFIG $PATH_TO_PTH_ADAPTER $SAVE_PATH_TO_HF_ADAPTER 5-2. Merge the HuggingFace's adapter to the pretrained LLM: - xtuner convert merge_adapter_hf $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH - 5-3. Merge the XTuner's adapter to the pretraiend LLM: - xtuner convert merge_adapter $CONFIG $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH - 5-4. Split HuggingFace's LLM to the smallest sharded one: + xtuner convert merge_adapter $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH + 5-3. Split HuggingFace's LLM to the smallest sharded one: xtuner convert split_hf_llm $NAME_OR_PATH_TO_HF_MODEL $SAVE_PATH 6-1. Preprocess arxiv dataset: xtuner preprocess arxiv $SRC_FILE $DST_FILE --start-date $START_DATE --categories $CATEGORIES @@ -76,11 +72,8 @@ 1. Convert the pth adapter to HuggingFace's adapter: xtuner convert adapter_pth2hf $CONFIG $PATH_TO_PTH_ADAPTER $SAVE_PATH_TO_HF_ADAPTER 2. Merge the HuggingFace's adapter to the pretrained LLM: - xtuner convert merge_adapter_hf $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH - 3. Merge the XTuner's - adapter to the pretraiend LLM: - xtuner convert merge_adapter $CONFIG $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH - 4. Split HuggingFace's LLM to the smallest sharded one: + xtuner convert merge_adapter $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH + 3. Split HuggingFace's LLM to the smallest sharded one: xtuner convert split_hf_llm $NAME_OR_PATH_TO_HF_MODEL $SAVE_PATH GitHub: https://github.com/InternLM/xtuner @@ -105,27 +98,6 @@ GitHub: https://github.com/InternLM/xtuner """ # noqa: E501 - -CHAT_HELP_MSG = \ - f""" - Arguments received: {str(['xtuner'] + sys.argv[1:])}. xtuner commands use the following syntax: - - xtuner MODE MODE_ARGS ARGS - - Where MODE (required) is one of {MODES} - MODE_ARG (optional) is the argument for specific mode - ARGS (optional) are the arguments for specific command - - Some usages for chat: (See more by using -h for specific command!) - - 1. Chat with LLMs with HuggingFace's model and adapter: - xtuner chat hf $NAME_OR_PATH_TO_HF_MODEL --adapter $NAME_OR_PATH_TO_HF_ADAPTER --prompt-template $PROMPT_TEMPLATE - 2. Chat with LLMs with XTuner's config and adapter: - xtuner chat xtuner internlm_7b_qlora_alpaca --adapter $PATH_TO_PTH_ADAPTER --prompt $PROMPT_TEMPLATE - - GitHub: https://github.com/InternLM/xtuner - """ # noqa: E501 - special = { 'help': lambda: print_log(CLI_HELP_MSG, 'current'), 'version': lambda: print_log(xtuner.__version__, 'current') @@ -143,16 +115,10 @@ 'copy-cfg': copy_cfg.__file__, 'train': train.__file__, 'test': test.__file__, - 'chat': { - 'hf': chat_hf.__file__, - 'xtuner': chat.__file__, - '--help': lambda: print_log(CHAT_HELP_MSG, 'current'), - '-h': lambda: print_log(CHAT_HELP_MSG, 'current') - }, + 'chat': chat.__file__, 'convert': { 'adapter_pth2hf': adapter_pth2hf.__file__, 'merge_adapter': merge_adapter.__file__, - 'merge_adapter_hf': merge_adapter_hf.__file__, 'split_hf_llm': split_hf_llm.__file__, '--help': lambda: print_log(CONVERT_HELP_MSG, 'current'), '-h': lambda: print_log(CONVERT_HELP_MSG, 'current') From 94844443ea46af6b5bee7f861e5591553e283379 Mon Sep 17 00:00:00 2001 From: LZHgrla Date: Mon, 4 Sep 2023 19:38:41 +0800 Subject: [PATCH 03/25] modify docs --- README.md | 2 +- README_zh-CN.md | 2 +- docs/en/user_guides/chat.md | 36 +++++++++++++++++----------------- docs/zh_cn/user_guides/chat.md | 36 +++++++++++++++++----------------- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 7879ea4d4..135f8b16f 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ XTuner provides tools to chat with pretrained / fine-tuned LLMs. - For example, we can start the chat with Llama2-7B-Plugins by ```shell - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer ``` For more examples, please see [chat.md](./docs/en/user_guides/chat.md). diff --git a/README_zh-CN.md b/README_zh-CN.md index bce94d4e2..e2d637354 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -147,7 +147,7 @@ XTuner 提供与大语言模型对话的工具。 - 例如,与基于插件微调获得的 Llama2-7B-Plugins 对话: ```shell - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer ``` 更多示例,请查阅[文档](./docs/zh_cn/user_guides/chat.md)。 diff --git a/docs/en/user_guides/chat.md b/docs/en/user_guides/chat.md index 2914296f0..9b9e35a87 100644 --- a/docs/en/user_guides/chat.md +++ b/docs/en/user_guides/chat.md @@ -5,49 +5,49 @@ - InternLM-7B, oasst1 ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant ``` - InternLM-7B, Arxiv Gentitle ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title ``` - InternLM-7B, Colorist ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist ``` - InternLM-7B, Coder ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code ``` - InternLM-7B, SQL ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql ``` - InternLM-7B, Lawyer ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer ``` - InternLM-7B, Open-Platypus ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca ``` - InternLM-7B, Alpaca-enzh ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca ``` ## Chat with [Llama2](https://github.com/facebookresearch/llama) @@ -58,19 +58,19 @@ ```shell export SERPER_API_KEY="xxx" # Please get the key from https://serper.dev to support google search! - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer ``` - Llama2-7B, Arxiv Gentitle ```shell - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title ``` - Llama2-7B, Colorist ```shell - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist ``` ## Chat with [Qwen](https://github.com/QwenLM) @@ -79,25 +79,25 @@ ```shell export SERPER_API_KEY="xxx" # Please get the key from https://serper.dev to support google search! - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" ``` - Qwen-7B, oasst1 ```shell - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>' ``` - Qwen-7B, Arxiv Gentitle ```shell - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>' ``` - Qwen-7B, Alpaca-enzh ```shell - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>' ``` ## Chat with [Baichuan](https://github.com/baichuan-inc) @@ -105,17 +105,17 @@ - Baichuan-7B, oasst1 ```shell - xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant ``` - Baichuan-7B, Arxiv Gentitle ```shell - xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer ``` - Baichuan-7B, Alpaca-enzh ```shell - xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca ``` diff --git a/docs/zh_cn/user_guides/chat.md b/docs/zh_cn/user_guides/chat.md index 1ae01388b..002cc89c8 100644 --- a/docs/zh_cn/user_guides/chat.md +++ b/docs/zh_cn/user_guides/chat.md @@ -5,49 +5,49 @@ - InternLM-7B, oasst1 ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant ``` - InternLM-7B, Arxiv Gentitle ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title ``` - InternLM-7B, Colorist ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist ``` - InternLM-7B, Coder ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code ``` - InternLM-7B, SQL ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql ``` - InternLM-7B, Lawyer ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer ``` - InternLM-7B, Open-Platypus ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca ``` - InternLM-7B, Alpaca-enzh ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca ``` ## 与微调后的 [Llama2](https://github.com/facebookresearch/llama) 对话 @@ -58,19 +58,19 @@ ```shell export SERPER_API_KEY="xxx" # 请从 https://serper.dev 获得API_KEY,以此支持谷歌搜索! - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer ``` - Llama2-7B, Arxiv Gentitle ```shell - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title ``` - Llama2-7B, Colorist ```shell - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist ``` ## 与微调后的 [Qwen](https://github.com/QwenLM) 对话 @@ -79,25 +79,25 @@ ```shell export SERPER_API_KEY="xxx" # 请从 https://serper.dev 获得API_KEY,以此支持谷歌搜索! - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" ``` - Qwen-7B, oasst1 ```shell - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>' ``` - Qwen-7B, Arxiv Gentitle ```shell - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>' ``` - Qwen-7B, Alpaca-enzh ```shell - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>' ``` ## 与微调后的 [Baichuan](https://github.com/baichuan-inc) 对话 @@ -105,17 +105,17 @@ - Baichuan-7B, oasst1 ```shell - xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant ``` - Baichuan-7B, Arxiv Gentitle ```shell - xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer ``` - Baichuan-7B, Alpaca-enzh ```shell - xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca ``` From f60d74a5855c52efadd6764f70036f571a6d5373 Mon Sep 17 00:00:00 2001 From: LZHgrla Date: Mon, 4 Sep 2023 19:41:36 +0800 Subject: [PATCH 04/25] update docs --- README.md | 12 ++++++------ README_zh-CN.md | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 135f8b16f..d60e46b5a 100644 --- a/README.md +++ b/README.md @@ -180,9 +180,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar For more examples, please see [finetune.md](./docs/en/user_guides/finetune.md). -### Deployment - -- **Step 0**, convert the pth adapter to HuggingFace adapter, by +- **Step 2** (optional), convert the pth adapter to HuggingFace adapter, by ```shell xtuner convert adapter_pth2hf \ @@ -191,12 +189,14 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar ${SAVE_PATH_TO_HF_ADAPTER} ``` - or, directly merge the pth adapter to pretrained LLM, by +### Deployment + +- **Step 0**, merge the HuggingFace adapter to pretrained LLM, by ```shell xtuner convert merge_adapter \ - ${CONFIG} \ - ${PATH_TO_PTH_ADAPTER} \ + ${NAME_OR_PATH_TO_LLM} \ + ${PATH_TO_ADAPTER} \ ${SAVE_PATH_TO_MERGED_LLM} \ --max-shard-size 2GB ``` diff --git a/README_zh-CN.md b/README_zh-CN.md index e2d637354..d182d722c 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -177,11 +177,9 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. NPROC_PER_NODE=${GPU_NUM} xtuner train internlm_7b_qlora_oasst1_e3 ``` - 更多示例,请查阅[文档](./docs/zh_cn/user_guides/finetune.md). + 更多示例,请查阅[文档](./docs/zh_cn/user_guides/finetune.md)。 -### 部署 - -- **步骤 0**,将 pth adapter 转换为 HuggingFace adapter: +- **步骤 2**(可选),将 pth adapter 转换为 HuggingFace adapter: ```shell xtuner convert adapter_pth2hf \ @@ -190,11 +188,13 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. ${SAVE_PATH_TO_HF_ADAPTER} ``` - 或者,直接将 pth adapter 合并到大语言模型: +### 部署 + +- **步骤 0**,将 HuggingFace adapter 合并到大语言模型: ```shell xtuner convert merge_adapter \ - ${CONFIG} \ + ${NAME_OR_PATH_TO_LLM} \ ${PATH_TO_PTH_ADAPTER} \ ${SAVE_PATH_TO_MERGED_LLM} \ --max-shard-size 2GB From 73adb7679804638d46e473a469de2f5ab45cb984 Mon Sep 17 00:00:00 2001 From: LZHgrla Date: Mon, 4 Sep 2023 19:45:28 +0800 Subject: [PATCH 05/25] fix --- README.md | 2 +- README_zh-CN.md | 2 +- docs/en/user_guides/chat.md | 36 +++++++++++++++++----------------- docs/zh_cn/user_guides/chat.md | 36 +++++++++++++++++----------------- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index d60e46b5a..2795f4f39 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ XTuner provides tools to chat with pretrained / fine-tuned LLMs. - For example, we can start the chat with Llama2-7B-Plugins by ```shell - xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer ``` For more examples, please see [chat.md](./docs/en/user_guides/chat.md). diff --git a/README_zh-CN.md b/README_zh-CN.md index d182d722c..eb8b1f7c7 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -147,7 +147,7 @@ XTuner 提供与大语言模型对话的工具。 - 例如,与基于插件微调获得的 Llama2-7B-Plugins 对话: ```shell - xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer ``` 更多示例,请查阅[文档](./docs/zh_cn/user_guides/chat.md)。 diff --git a/docs/en/user_guides/chat.md b/docs/en/user_guides/chat.md index 9b9e35a87..65725f7a3 100644 --- a/docs/en/user_guides/chat.md +++ b/docs/en/user_guides/chat.md @@ -5,49 +5,49 @@ - InternLM-7B, oasst1 ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant ``` - InternLM-7B, Arxiv Gentitle ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title ``` - InternLM-7B, Colorist ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist ``` - InternLM-7B, Coder ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code ``` - InternLM-7B, SQL ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql ``` - InternLM-7B, Lawyer ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer ``` - InternLM-7B, Open-Platypus ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca ``` - InternLM-7B, Alpaca-enzh ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca ``` ## Chat with [Llama2](https://github.com/facebookresearch/llama) @@ -58,19 +58,19 @@ ```shell export SERPER_API_KEY="xxx" # Please get the key from https://serper.dev to support google search! - xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer ``` - Llama2-7B, Arxiv Gentitle ```shell - xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title ``` - Llama2-7B, Colorist ```shell - xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist ``` ## Chat with [Qwen](https://github.com/QwenLM) @@ -79,25 +79,25 @@ ```shell export SERPER_API_KEY="xxx" # Please get the key from https://serper.dev to support google search! - xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" ``` - Qwen-7B, oasst1 ```shell - xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>' ``` - Qwen-7B, Arxiv Gentitle ```shell - xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>' ``` - Qwen-7B, Alpaca-enzh ```shell - xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>' ``` ## Chat with [Baichuan](https://github.com/baichuan-inc) @@ -105,17 +105,17 @@ - Baichuan-7B, oasst1 ```shell - xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant ``` - Baichuan-7B, Arxiv Gentitle ```shell - xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer ``` - Baichuan-7B, Alpaca-enzh ```shell - xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca ``` diff --git a/docs/zh_cn/user_guides/chat.md b/docs/zh_cn/user_guides/chat.md index 002cc89c8..6fef1684c 100644 --- a/docs/zh_cn/user_guides/chat.md +++ b/docs/zh_cn/user_guides/chat.md @@ -5,49 +5,49 @@ - InternLM-7B, oasst1 ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant ``` - InternLM-7B, Arxiv Gentitle ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title ``` - InternLM-7B, Colorist ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist ``` - InternLM-7B, Coder ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code ``` - InternLM-7B, SQL ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql ``` - InternLM-7B, Lawyer ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer ``` - InternLM-7B, Open-Platypus ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca ``` - InternLM-7B, Alpaca-enzh ```shell - xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca ``` ## 与微调后的 [Llama2](https://github.com/facebookresearch/llama) 对话 @@ -58,19 +58,19 @@ ```shell export SERPER_API_KEY="xxx" # 请从 https://serper.dev 获得API_KEY,以此支持谷歌搜索! - xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer ``` - Llama2-7B, Arxiv Gentitle ```shell - xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title ``` - Llama2-7B, Colorist ```shell - xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist ``` ## 与微调后的 [Qwen](https://github.com/QwenLM) 对话 @@ -79,25 +79,25 @@ ```shell export SERPER_API_KEY="xxx" # 请从 https://serper.dev 获得API_KEY,以此支持谷歌搜索! - xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" ``` - Qwen-7B, oasst1 ```shell - xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>' ``` - Qwen-7B, Arxiv Gentitle ```shell - xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>' ``` - Qwen-7B, Alpaca-enzh ```shell - xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>' ``` ## 与微调后的 [Baichuan](https://github.com/baichuan-inc) 对话 @@ -105,17 +105,17 @@ - Baichuan-7B, oasst1 ```shell - xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant ``` - Baichuan-7B, Arxiv Gentitle ```shell - xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer ``` - Baichuan-7B, Alpaca-enzh ```shell - xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca ``` From 810769b0eb69221e20ebfc21eb278d1e88dda438 Mon Sep 17 00:00:00 2001 From: LZHgrla Date: Mon, 4 Sep 2023 19:47:45 +0800 Subject: [PATCH 06/25] fix --- README.md | 60 ++++++++++++++++++++++++------------------------- README_zh-CN.md | 60 ++++++++++++++++++++++++------------------------- 2 files changed, 60 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index 2795f4f39..dfbb17205 100644 --- a/README.md +++ b/README.md @@ -123,35 +123,6 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor]( pip install -e '.[all]' ``` -### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) - - - - - - - - - - -
Examples of Plugins-based Chat 🔥🔥🔥
- - - - - -
- -XTuner provides tools to chat with pretrained / fine-tuned LLMs. - -- For example, we can start the chat with Llama2-7B-Plugins by - - ```shell - xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer - ``` - -For more examples, please see [chat.md](./docs/en/user_guides/chat.md). - ### Fine-tune [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing) XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepare guides can be found on [dataset_prepare.md](./docs/en/user_guides/dataset_prepare.md). @@ -180,7 +151,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar For more examples, please see [finetune.md](./docs/en/user_guides/finetune.md). -- **Step 2** (optional), convert the pth adapter to HuggingFace adapter, by +- **Step 2**, convert the pth adapter to HuggingFace adapter, by ```shell xtuner convert adapter_pth2hf \ @@ -189,6 +160,35 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar ${SAVE_PATH_TO_HF_ADAPTER} ``` +### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) + + + + + + + + + + +
Examples of Plugins-based Chat 🔥🔥🔥
+ + + + + +
+ +XTuner provides tools to chat with pretrained / fine-tuned LLMs. + +- For example, we can start the chat with Llama2-7B-Plugins by + + ```shell + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer + ``` + +For more examples, please see [chat.md](./docs/en/user_guides/chat.md). + ### Deployment - **Step 0**, merge the HuggingFace adapter to pretrained LLM, by diff --git a/README_zh-CN.md b/README_zh-CN.md index eb8b1f7c7..5a4373852 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -123,35 +123,6 @@ XTuner 是一个轻量级微调大语言模型的工具库,由 [MMRazor](https pip install -e '.[all]' ``` -### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) - - - - - - - - - - -
基于插件的对话 🔥🔥🔥
- - - - - -
- -XTuner 提供与大语言模型对话的工具。 - -- 例如,与基于插件微调获得的 Llama2-7B-Plugins 对话: - - ```shell - xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer - ``` - -更多示例,请查阅[文档](./docs/zh_cn/user_guides/chat.md)。 - ### 微调 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing) XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](./docs/zh_cn/user_guides/dataset_prepare.md)。 @@ -179,7 +150,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. 更多示例,请查阅[文档](./docs/zh_cn/user_guides/finetune.md)。 -- **步骤 2**(可选),将 pth adapter 转换为 HuggingFace adapter: +- **步骤 2**,将 pth adapter 转换为 HuggingFace adapter: ```shell xtuner convert adapter_pth2hf \ @@ -188,6 +159,35 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. ${SAVE_PATH_TO_HF_ADAPTER} ``` +### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) + + + + + + + + + + +
基于插件的对话 🔥🔥🔥
+ + + + + +
+ +XTuner 提供与大语言模型对话的工具。 + +- 例如,与基于插件微调获得的 Llama2-7B-Plugins 对话: + + ```shell + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer + ``` + +更多示例,请查阅[文档](./docs/zh_cn/user_guides/chat.md)。 + ### 部署 - **步骤 0**,将 HuggingFace adapter 合并到大语言模型: From e5ce42c0f9dabc78af2a28e75c7a181a926e1826 Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 10:10:12 +0800 Subject: [PATCH 07/25] Update README.md --- README.md | 66 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index dfbb17205..ba236ac34 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,27 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor]( ## 🌟 Demos +- Ready-to-use models and datasets from XTuner API [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing) - QLoRA Fine-tune [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing) - Plugin-based Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) -- Ready-to-use models and datasets from XTuner API [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing) + + + + + + + + + + +
Examples of Plugins-based Chat 🔥🔥🔥
+ + + + + +
+ ## 🔥 Supports @@ -139,7 +157,13 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar xtuner copy-cfg ${CONFIG_NAME} ${SAVE_DIR} ``` -- **Step 1**, start fine-tuning. For example, we can start the QLoRA fine-tuning of InternLM-7B with oasst1 dataset by +- **Step 1**, start fine-tuning. + + ```shell + xtuner train ${CONFIG_NAME_OR_PATH} + ``` + + For example, we can start the QLoRA fine-tuning of InternLM-7B with oasst1 dataset by ```shell # On a single GPU @@ -151,41 +175,25 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar For more examples, please see [finetune.md](./docs/en/user_guides/finetune.md). -- **Step 2**, convert the pth adapter to HuggingFace adapter, by +- **Step 2**, convert the saved pth model to HuggingFace model, by ```shell - xtuner convert adapter_pth2hf \ - ${CONFIG} \ - ${PATH_TO_PTH_ADAPTER} \ - ${SAVE_PATH_TO_HF_ADAPTER} + xtuner convert pth2hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL} ``` ### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) - - - - - - - - - -
Examples of Plugins-based Chat 🔥🔥🔥
- - - - - -
- XTuner provides tools to chat with pretrained / fine-tuned LLMs. -- For example, we can start the chat with Llama2-7B-Plugins by +```shell +xtuner chat ${NAME_OR_PATH_TO_LLM} --adapter {NAME_OR_PATH_TO_ADAPTER} [optional arguments] +``` - ```shell - xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer - ``` +For example, we can start the chat with Llama2-7b with adapter trained from MOSS-003-SFT by + +```shell +xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer +``` For more examples, please see [chat.md](./docs/en/user_guides/chat.md). @@ -196,7 +204,7 @@ For more examples, please see [chat.md](./docs/en/user_guides/chat.md). ```shell xtuner convert merge_adapter \ ${NAME_OR_PATH_TO_LLM} \ - ${PATH_TO_ADAPTER} \ + ${NAME_OR_PATH_TO_ADAPTER} \ ${SAVE_PATH_TO_MERGED_LLM} \ --max-shard-size 2GB ``` From 515b933b79f93d34617016ae63ca0f3f19276ab2 Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 10:11:27 +0800 Subject: [PATCH 08/25] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ba236ac34..d85124516 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor]( - +
Examples of Plugins-based Chat 🔥🔥🔥Examples of Plugin-based Chat 🔥🔥🔥
@@ -220,7 +220,7 @@ For more examples, please see [chat.md](./docs/en/user_guides/chat.md). --seed 0 ``` - 🎯 We are woking closely with [LMDeploy](https://github.com/InternLM/lmdeploy), to implement the deployment of **plugins-based chat**! + 🎯 We are woking closely with [LMDeploy](https://github.com/InternLM/lmdeploy), to implement the deployment of **plugin-based chat**! ### Evaluation From cf8088bba6f32ea9c0a10c7fdc2cc39cd838ffa0 Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 10:12:01 +0800 Subject: [PATCH 09/25] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d85124516..e24901117 100644 --- a/README.md +++ b/README.md @@ -175,7 +175,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar For more examples, please see [finetune.md](./docs/en/user_guides/finetune.md). -- **Step 2**, convert the saved pth model to HuggingFace model, by +- **Step 2**, convert the saved PTH model to HuggingFace model, by ```shell xtuner convert pth2hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL} From a7c0481d75bcf549dbfb177abb47dfce50d77b85 Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 10:15:12 +0800 Subject: [PATCH 10/25] Update README_zh-CN.md --- README_zh-CN.md | 66 +++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/README_zh-CN.md b/README_zh-CN.md index 5a4373852..8bc6bcc8b 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -26,9 +26,27 @@ XTuner 是一个轻量级微调大语言模型的工具库,由 [MMRazor](https ## 🌟 示例 +- XTuner APIs所提供的开箱即用的模型与数据集 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing) - QLoRA 微调 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing) - 基于插件的对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) -- XTuner APIs所提供的开箱即用的模型与数据集 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing) + + + + + + + + + + +
基于插件的对话 🔥🔥🔥
+ + + + + +
+ ## 🔥 支持列表 @@ -139,7 +157,13 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. xtuner copy-cfg ${CONFIG_NAME} ${SAVE_DIR} ``` -- **步骤 1**,开始微调。例如,我们可以利用 QLoRA 算法在 oasst1 数据集上微调 InternLM-7B: +- **步骤 1**,开始微调。 + + ```shell + xtuner train ${CONFIG_NAME_OR_PATH} + ``` + + 例如,我们可以利用 QLoRA 算法在 oasst1 数据集上微调 InternLM-7B: ```shell # 单卡 @@ -150,41 +174,25 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. 更多示例,请查阅[文档](./docs/zh_cn/user_guides/finetune.md)。 -- **步骤 2**,将 pth adapter 转换为 HuggingFace adapter: +- **步骤 2**,将保存的 PTH 模型转换为 HuggingFace 模型: ```shell - xtuner convert adapter_pth2hf \ - ${CONFIG} \ - ${PATH_TO_PTH_ADAPTER} \ - ${SAVE_PATH_TO_HF_ADAPTER} + xtuner convert pth2hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL} ``` ### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) - - - - - - - - - -
基于插件的对话 🔥🔥🔥
- - - - - -
- XTuner 提供与大语言模型对话的工具。 -- 例如,与基于插件微调获得的 Llama2-7B-Plugins 对话: +```shell +xtuner chat ${NAME_OR_PATH_TO_LLM} --adapter {NAME_OR_PATH_TO_ADAPTER} [optional arguments] +``` - ```shell - xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer - ``` +例如,与 Llama2-7b + MOSS-003-SFT adapter 对话: + +```shell +xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer +``` 更多示例,请查阅[文档](./docs/zh_cn/user_guides/chat.md)。 @@ -195,7 +203,7 @@ XTuner 提供与大语言模型对话的工具。 ```shell xtuner convert merge_adapter \ ${NAME_OR_PATH_TO_LLM} \ - ${PATH_TO_PTH_ADAPTER} \ + ${NAME_OR_PATH_TO_ADAPTER} \ ${SAVE_PATH_TO_MERGED_LLM} \ --max-shard-size 2GB ``` From 85da402697a55c16ec678541a61cc01d897cf4ef Mon Sep 17 00:00:00 2001 From: LZHgrla Date: Tue, 5 Sep 2023 10:15:35 +0800 Subject: [PATCH 11/25] fix pre-commit --- README.md | 3 ++- README_zh-CN.md | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e24901117..60b938e1f 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,9 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor]( ## 🌟 Demos - Ready-to-use models and datasets from XTuner API [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing) + - QLoRA Fine-tune [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing) + - Plugin-based Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) @@ -47,7 +49,6 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor](
- ## 🔥 Supports diff --git a/README_zh-CN.md b/README_zh-CN.md index 8bc6bcc8b..8bf52a263 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -27,9 +27,11 @@ XTuner 是一个轻量级微调大语言模型的工具库,由 [MMRazor](https ## 🌟 示例 - XTuner APIs所提供的开箱即用的模型与数据集 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing) + - QLoRA 微调 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing) + - 基于插件的对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) - +
@@ -47,7 +49,6 @@ XTuner 是一个轻量级微调大语言模型的工具库,由 [MMRazor](https
基于插件的对话 🔥🔥🔥
- ## 🔥 支持列表 @@ -162,7 +163,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. ```shell xtuner train ${CONFIG_NAME_OR_PATH} ``` - + 例如,我们可以利用 QLoRA 算法在 oasst1 数据集上微调 InternLM-7B: ```shell From df02d715e23656c55ad9ba90cc55c6b9407ee38c Mon Sep 17 00:00:00 2001 From: LZHgrla Date: Tue, 5 Sep 2023 10:19:52 +0800 Subject: [PATCH 12/25] rename converter --- xtuner/entry_point.py | 27 +++++++++---------- .../{merge_adapter.py => merge.py} | 0 .../{adapter_pth2hf.py => pth2hf.py} | 0 .../{split_hf_llm.py => split.py} | 0 4 files changed, 13 insertions(+), 14 deletions(-) rename xtuner/tools/model_converters/{merge_adapter.py => merge.py} (100%) rename xtuner/tools/model_converters/{adapter_pth2hf.py => pth2hf.py} (100%) rename xtuner/tools/model_converters/{split_hf_llm.py => split.py} (100%) diff --git a/xtuner/entry_point.py b/xtuner/entry_point.py index c57ff2810..5a775102d 100644 --- a/xtuner/entry_point.py +++ b/xtuner/entry_point.py @@ -10,8 +10,7 @@ import xtuner from xtuner.tools import chat, copy_cfg, list_cfg, test, train from xtuner.tools.data_preprocess import arxiv as arxiv_preprocess -from xtuner.tools.model_converters import (adapter_pth2hf, merge_adapter, - split_hf_llm) +from xtuner.tools.model_converters import merge, pth2hf, split # Define valid modes MODES = ('list-cfg', 'copy-cfg', 'train', 'test', 'chat', 'convert', @@ -38,13 +37,13 @@ 3-2. Fine-tune LLMs by multiple GPUs: NPROC_PER_NODE=$NGPUS NNODES=$NNODES NODE_RANK=$NODE_RANK PORT=$PORT ADDR=$ADDR xtuner dist_train $CONFIG $GPUS 4. Chat with LLMs with HuggingFace's model and adapter: - xtuner chat $NAME_OR_PATH_TO_HF_MODEL --adapter $NAME_OR_PATH_TO_HF_ADAPTER --prompt-template $PROMPT_TEMPLATE - 5-1. Convert the pth adapter to HuggingFace's adapter: - xtuner convert adapter_pth2hf $CONFIG $PATH_TO_PTH_ADAPTER $SAVE_PATH_TO_HF_ADAPTER + xtuner chat $NAME_OR_PATH_TO_LLM --adapter $NAME_OR_PATH_TO_ADAPTER --prompt-template $PROMPT_TEMPLATE + 5-1. Convert the pth model to HuggingFace's model: + xtuner convert pth2hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL 5-2. Merge the HuggingFace's adapter to the pretrained LLM: - xtuner convert merge_adapter $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH + xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH 5-3. Split HuggingFace's LLM to the smallest sharded one: - xtuner convert split_hf_llm $NAME_OR_PATH_TO_HF_MODEL $SAVE_PATH + xtuner convert split $NAME_OR_PATH_TO_LLM $SAVE_PATH 6-1. Preprocess arxiv dataset: xtuner preprocess arxiv $SRC_FILE $DST_FILE --start-date $START_DATE --categories $CATEGORIES @@ -69,12 +68,12 @@ Some usages for convert: (See more by using -h for specific command!) - 1. Convert the pth adapter to HuggingFace's adapter: - xtuner convert adapter_pth2hf $CONFIG $PATH_TO_PTH_ADAPTER $SAVE_PATH_TO_HF_ADAPTER + 1. Convert the pth model to HuggingFace's model: + xtuner convert pth2hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL 2. Merge the HuggingFace's adapter to the pretrained LLM: - xtuner convert merge_adapter $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH + xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH 3. Split HuggingFace's LLM to the smallest sharded one: - xtuner convert split_hf_llm $NAME_OR_PATH_TO_HF_MODEL $SAVE_PATH + xtuner convert split $NAME_OR_PATH_TO_LLM $SAVE_PATH GitHub: https://github.com/InternLM/xtuner """ # noqa: E501 @@ -117,9 +116,9 @@ 'test': test.__file__, 'chat': chat.__file__, 'convert': { - 'adapter_pth2hf': adapter_pth2hf.__file__, - 'merge_adapter': merge_adapter.__file__, - 'split_hf_llm': split_hf_llm.__file__, + 'pth2hf': pth2hf.__file__, + 'merge': merge.__file__, + 'split': split.__file__, '--help': lambda: print_log(CONVERT_HELP_MSG, 'current'), '-h': lambda: print_log(CONVERT_HELP_MSG, 'current') }, diff --git a/xtuner/tools/model_converters/merge_adapter.py b/xtuner/tools/model_converters/merge.py similarity index 100% rename from xtuner/tools/model_converters/merge_adapter.py rename to xtuner/tools/model_converters/merge.py diff --git a/xtuner/tools/model_converters/adapter_pth2hf.py b/xtuner/tools/model_converters/pth2hf.py similarity index 100% rename from xtuner/tools/model_converters/adapter_pth2hf.py rename to xtuner/tools/model_converters/pth2hf.py diff --git a/xtuner/tools/model_converters/split_hf_llm.py b/xtuner/tools/model_converters/split.py similarity index 100% rename from xtuner/tools/model_converters/split_hf_llm.py rename to xtuner/tools/model_converters/split.py From b97e9fe7bf7f8a6335b9f4605ddd65d3f0c4670a Mon Sep 17 00:00:00 2001 From: LZHgrla Date: Tue, 5 Sep 2023 10:54:45 +0800 Subject: [PATCH 13/25] update pth2hf --- xtuner/tools/chat.py | 7 ---- xtuner/tools/model_converters/pth2hf.py | 48 +++++++++++++++++-------- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/xtuner/tools/chat.py b/xtuner/tools/chat.py index 2af9143a4..f3cf452c6 100644 --- a/xtuner/tools/chat.py +++ b/xtuner/tools/chat.py @@ -26,7 +26,6 @@ def parse_args(): parser = argparse.ArgumentParser(description='Chat with a HF model') parser.add_argument( 'model_name_or_path', help='Hugging Face model name or path') - parser.add_argument('--pretrained', default=None, help='pretrained path') parser.add_argument('--adapter', default=None, help='adapter name or path') parser.add_argument( '--prompt-template', @@ -134,18 +133,12 @@ def main(): bnb_4bit_quant_type='nf4') elif args.bits == 8: load_in_8bit = True - assert args.pretrained is None or args.bits is None model = AutoModelForCausalLM.from_pretrained( args.model_name_or_path, quantization_config=quantization_config, load_in_8bit=load_in_8bit, device_map='auto', trust_remote_code=True) - if args.pretrained is not None: - pretrained_ckpt = torch.load(args.pretrained, map_location='cpu') - pretrained_ckpt = remove_prefix(pretrained_ckpt, 'llm.') - model.load_state_dict(pretrained_ckpt) - print(f'Load pretrained weight from {args.pretrained}') tokenizer = AutoTokenizer.from_pretrained( args.model_name_or_path, trust_remote_code=True) if args.adapter is not None: diff --git a/xtuner/tools/model_converters/pth2hf.py b/xtuner/tools/model_converters/pth2hf.py index ce69c90a9..32aecad2d 100644 --- a/xtuner/tools/model_converters/pth2hf.py +++ b/xtuner/tools/model_converters/pth2hf.py @@ -5,7 +5,6 @@ import torch from mmengine.config import Config, DictAction -from mmengine.utils import mkdir_or_exist from xtuner.configs import cfgs_name_path from xtuner.registry import BUILDER @@ -13,18 +12,14 @@ def parse_args(): parser = argparse.ArgumentParser( - description='Convert the pth adapter to HuggingFace adapter') + description='Convert the pth model to HuggingFace model') parser.add_argument( 'config', help='config file name or path. Note: Please use the original ' 'configs, instead of the automatically saved log configs.') - parser.add_argument('adapter_checkpoint', help='adapter checkpoint file') + parser.add_argument('pth_model', help='pth model file') parser.add_argument( - 'save_dir', help='the directory to save the checkpoint') - parser.add_argument( - '--is-deepspeed', - action='store_true', - help='whether the adapter is saved from deepspeed') + 'save_dir', help='the directory to save HuggingFace model') parser.add_argument( '--cfg-options', nargs='+', @@ -39,6 +34,29 @@ def parse_args(): return args +def guess_load_checkpoint(pth_model): + if os.path.isfile(pth_model): + state_dict = torch.load(pth_model, map_location='cpu') + if 'state_dict' in state_dict: + state_dict = state_dict['state_dict'] + elif os.path.isdir(pth_model): + try: + from deepspeed.utils.zero_to_fp32 import \ + get_fp32_state_dict_from_zero_checkpoint + except ImportError: + raise ImportError( + 'The provided PTH model appears to be a DeepSpeed checkpoint. ' + 'However, DeepSpeed library is not detected in current ' + 'environment. This suggests that DeepSpeed may not be ' + 'installed or is incorrectly configured. Please verify your ' + 'setup.') + state_dict = get_fp32_state_dict_from_zero_checkpoint( + os.path.dirname(pth_model), os.path.basename(pth_model)) + else: + raise FileNotFoundError(f'Cannot find {pth_model}') + return state_dict + + def main(): args = parse_args() @@ -56,17 +74,19 @@ def main(): model = BUILDER.build(cfg.model) - state_dict = torch.load(args.adapter_checkpoint, map_location='cpu') - if not args.is_deepspeed: - state_dict = state_dict['state_dict'] + state_dict = guess_load_checkpoint(args.pth_model) model.load_state_dict(state_dict, strict=False) - print(f'Load adapter from {args.adapter_checkpoint}') + print(f'Load PTH model from {args.pth_model}') - mkdir_or_exist(args.save_dir) + print(f'Saving HuggingFace model to {args.save_dir}') model.llm.save_pretrained(args.save_dir) + if 'PeftModel' not in model.llm.__class__.__name__: + print(f'Saving HuggingFace tokenizer to {args.save_dir}') + tokenizer = BUILDER.build(cfg.tokenizer) + tokenizer.save_pretrained(args.save_dir) shutil.copyfile(args.config, os.path.join(args.save_dir, 'xtuner_config.py')) - print(f'Save to {args.save_dir}') + print('All done!') if __name__ == '__main__': From a87eff26a3ccae3743fb5ca763235dd75e0a5161 Mon Sep 17 00:00:00 2001 From: LZHgrla Date: Tue, 5 Sep 2023 10:55:46 +0800 Subject: [PATCH 14/25] rename pth2hf to pth_to_hf --- README.md | 2 +- README_zh-CN.md | 2 +- xtuner/entry_point.py | 8 ++++---- xtuner/tools/model_converters/{pth2hf.py => pth_to_hf.py} | 0 4 files changed, 6 insertions(+), 6 deletions(-) rename xtuner/tools/model_converters/{pth2hf.py => pth_to_hf.py} (100%) diff --git a/README.md b/README.md index 60b938e1f..b04af0ef1 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar - **Step 2**, convert the saved PTH model to HuggingFace model, by ```shell - xtuner convert pth2hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL} + xtuner convert pth_to_hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL} ``` ### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) diff --git a/README_zh-CN.md b/README_zh-CN.md index 8bf52a263..1615959d4 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -178,7 +178,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. - **步骤 2**,将保存的 PTH 模型转换为 HuggingFace 模型: ```shell - xtuner convert pth2hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL} + xtuner convert pth_to_hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL} ``` ### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) diff --git a/xtuner/entry_point.py b/xtuner/entry_point.py index 5a775102d..890210e6d 100644 --- a/xtuner/entry_point.py +++ b/xtuner/entry_point.py @@ -10,7 +10,7 @@ import xtuner from xtuner.tools import chat, copy_cfg, list_cfg, test, train from xtuner.tools.data_preprocess import arxiv as arxiv_preprocess -from xtuner.tools.model_converters import merge, pth2hf, split +from xtuner.tools.model_converters import merge, pth_to_hf, split # Define valid modes MODES = ('list-cfg', 'copy-cfg', 'train', 'test', 'chat', 'convert', @@ -39,7 +39,7 @@ 4. Chat with LLMs with HuggingFace's model and adapter: xtuner chat $NAME_OR_PATH_TO_LLM --adapter $NAME_OR_PATH_TO_ADAPTER --prompt-template $PROMPT_TEMPLATE 5-1. Convert the pth model to HuggingFace's model: - xtuner convert pth2hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL + xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL 5-2. Merge the HuggingFace's adapter to the pretrained LLM: xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH 5-3. Split HuggingFace's LLM to the smallest sharded one: @@ -69,7 +69,7 @@ Some usages for convert: (See more by using -h for specific command!) 1. Convert the pth model to HuggingFace's model: - xtuner convert pth2hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL + xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL 2. Merge the HuggingFace's adapter to the pretrained LLM: xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH 3. Split HuggingFace's LLM to the smallest sharded one: @@ -116,7 +116,7 @@ 'test': test.__file__, 'chat': chat.__file__, 'convert': { - 'pth2hf': pth2hf.__file__, + 'pth_to_hf': pth_to_hf.__file__, 'merge': merge.__file__, 'split': split.__file__, '--help': lambda: print_log(CONVERT_HELP_MSG, 'current'), diff --git a/xtuner/tools/model_converters/pth2hf.py b/xtuner/tools/model_converters/pth_to_hf.py similarity index 100% rename from xtuner/tools/model_converters/pth2hf.py rename to xtuner/tools/model_converters/pth_to_hf.py From f5db1fe45f17a409f359d855105e803bca02ee29 Mon Sep 17 00:00:00 2001 From: LZHgrla Date: Tue, 5 Sep 2023 11:39:14 +0800 Subject: [PATCH 15/25] add fp32 for pth_to_hf --- xtuner/entry_point.py | 10 +++++----- xtuner/tools/model_converters/merge.py | 7 ++++++- xtuner/tools/model_converters/pth_to_hf.py | 17 ++++++++++++++++- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/xtuner/entry_point.py b/xtuner/entry_point.py index 890210e6d..f36f2cc39 100644 --- a/xtuner/entry_point.py +++ b/xtuner/entry_point.py @@ -36,14 +36,14 @@ xtuner train $CONFIG 3-2. Fine-tune LLMs by multiple GPUs: NPROC_PER_NODE=$NGPUS NNODES=$NNODES NODE_RANK=$NODE_RANK PORT=$PORT ADDR=$ADDR xtuner dist_train $CONFIG $GPUS - 4. Chat with LLMs with HuggingFace's model and adapter: - xtuner chat $NAME_OR_PATH_TO_LLM --adapter $NAME_OR_PATH_TO_ADAPTER --prompt-template $PROMPT_TEMPLATE - 5-1. Convert the pth model to HuggingFace's model: + 4-1. Convert the pth model to HuggingFace's model: xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL - 5-2. Merge the HuggingFace's adapter to the pretrained LLM: + 4-2. Merge the HuggingFace's adapter to the pretrained LLM: xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH - 5-3. Split HuggingFace's LLM to the smallest sharded one: + 4-3. Split HuggingFace's LLM to the smallest sharded one: xtuner convert split $NAME_OR_PATH_TO_LLM $SAVE_PATH + 5. Chat with LLMs with HuggingFace's model and adapter: + xtuner chat $NAME_OR_PATH_TO_LLM --adapter $NAME_OR_PATH_TO_ADAPTER --prompt-template $PROMPT_TEMPLATE 6-1. Preprocess arxiv dataset: xtuner preprocess arxiv $SRC_FILE $DST_FILE --start-date $START_DATE --categories $CATEGORIES diff --git a/xtuner/tools/model_converters/merge.py b/xtuner/tools/model_converters/merge.py index 2de6bc23a..169cea620 100644 --- a/xtuner/tools/model_converters/merge.py +++ b/xtuner/tools/model_converters/merge.py @@ -13,7 +13,12 @@ def parse_args(): parser.add_argument('adapter_name_or_path', help='adapter name or path') parser.add_argument( 'save_dir', help='the directory to save the merged model') - parser.add_argument('--max-shard-size', type=str, default='2GB') + parser.add_argument( + '--max-shard-size', + type=str, + default='2GB', + help='Only applicable for LLM. The maximum size for ' + 'each sharded checkpoint.') args = parser.parse_args() return args diff --git a/xtuner/tools/model_converters/pth_to_hf.py b/xtuner/tools/model_converters/pth_to_hf.py index 32aecad2d..ccf15861b 100644 --- a/xtuner/tools/model_converters/pth_to_hf.py +++ b/xtuner/tools/model_converters/pth_to_hf.py @@ -20,6 +20,16 @@ def parse_args(): parser.add_argument('pth_model', help='pth model file') parser.add_argument( 'save_dir', help='the directory to save HuggingFace model') + parser.add_argument( + '--fp32', + action='store_true', + help='Save as fp32. If not set, fp16 will be used by default.') + parser.add_argument( + '--max-shard-size', + type=str, + default='2GB', + help='Only applicable for LLM. The maximum size for ' + 'each sharded checkpoint.') parser.add_argument( '--cfg-options', nargs='+', @@ -78,8 +88,13 @@ def main(): model.load_state_dict(state_dict, strict=False) print(f'Load PTH model from {args.pth_model}') + if not args.fp32: + print('Convert weights to float16') + model.llm.half() + print(f'Saving HuggingFace model to {args.save_dir}') - model.llm.save_pretrained(args.save_dir) + model.llm.save_pretrained( + args.save_dir, max_shard_size=args.max_shard_size) if 'PeftModel' not in model.llm.__class__.__name__: print(f'Saving HuggingFace tokenizer to {args.save_dir}') tokenizer = BUILDER.build(cfg.tokenizer) From 8e3a71eb6f007a065cef3335bfbe3853d504f201 Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 11:49:01 +0800 Subject: [PATCH 16/25] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b04af0ef1..9f4e9e326 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar For more examples, please see [finetune.md](./docs/en/user_guides/finetune.md). -- **Step 2**, convert the saved PTH model to HuggingFace model, by +- **Step 2**, convert the saved PTH model (if using DeepSpeed, it will be a directory) to HuggingFace model, by ```shell xtuner convert pth_to_hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL} From 2cac8f43f09185269173dddcea9815e4fcf878f1 Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 11:50:58 +0800 Subject: [PATCH 17/25] Update README_zh-CN.md --- README_zh-CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_zh-CN.md b/README_zh-CN.md index 1615959d4..d0455541e 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -175,7 +175,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. 更多示例,请查阅[文档](./docs/zh_cn/user_guides/finetune.md)。 -- **步骤 2**,将保存的 PTH 模型转换为 HuggingFace 模型: +- **步骤 2**,将保存的 PTH 模型(如果使用的DeepSpeed,则将会是一个文件夹)转换为 HuggingFace 模型: ```shell xtuner convert pth_to_hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL} From 737e3ad9401b7bc674140afa1d1124775957aa7f Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 12:06:02 +0800 Subject: [PATCH 18/25] Update README_zh-CN.md --- README_zh-CN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README_zh-CN.md b/README_zh-CN.md index d0455541e..4f1969356 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -178,7 +178,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. - **步骤 2**,将保存的 PTH 模型(如果使用的DeepSpeed,则将会是一个文件夹)转换为 HuggingFace 模型: ```shell - xtuner convert pth_to_hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL} + xtuner convert pth_to_hf ${CONFIG} ${PTH} ${SAVE_PATH} ``` ### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) @@ -205,7 +205,7 @@ xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003- xtuner convert merge_adapter \ ${NAME_OR_PATH_TO_LLM} \ ${NAME_OR_PATH_TO_ADAPTER} \ - ${SAVE_PATH_TO_MERGED_LLM} \ + ${SAVE_PATH} \ --max-shard-size 2GB ``` From 81373c42bb6703d6f187bd6e6ce26f01ddc03e86 Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 12:07:43 +0800 Subject: [PATCH 19/25] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9f4e9e326..dcc0c29d7 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar Or, if the provided configs cannot meet the requirements, please copy the provided config to the specified directory and make specific modifications by ```shell - xtuner copy-cfg ${CONFIG_NAME} ${SAVE_DIR} + xtuner copy-cfg ${CONFIG_NAME} ${SAVE_PATH} ``` - **Step 1**, start fine-tuning. @@ -179,7 +179,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar - **Step 2**, convert the saved PTH model (if using DeepSpeed, it will be a directory) to HuggingFace model, by ```shell - xtuner convert pth_to_hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL} + xtuner convert pth_to_hf ${CONFIG} ${PTH} ${SAVE_PATH} ``` ### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) @@ -206,7 +206,7 @@ For more examples, please see [chat.md](./docs/en/user_guides/chat.md). xtuner convert merge_adapter \ ${NAME_OR_PATH_TO_LLM} \ ${NAME_OR_PATH_TO_ADAPTER} \ - ${SAVE_PATH_TO_MERGED_LLM} \ + ${SAVE_PATH} \ --max-shard-size 2GB ``` From 45627b9baa0a01f86bbbbb6a1daef31a18fe5dfe Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 12:07:45 +0800 Subject: [PATCH 20/25] Update README_zh-CN.md --- README_zh-CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_zh-CN.md b/README_zh-CN.md index 4f1969356..8fe70cf5d 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -155,7 +155,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. 或者,如果所提供的配置文件不能满足使用需求,请导出所提供的配置文件并进行相应更改: ```shell - xtuner copy-cfg ${CONFIG_NAME} ${SAVE_DIR} + xtuner copy-cfg ${CONFIG_NAME} ${SAVE_PATH} ``` - **步骤 1**,开始微调。 From 24f2dfeece461159c2cbe78f4d6a3d2c7bd2a56e Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 12:08:15 +0800 Subject: [PATCH 21/25] Update README_zh-CN.md --- README_zh-CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_zh-CN.md b/README_zh-CN.md index 8fe70cf5d..69229f99c 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -178,7 +178,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. - **步骤 2**,将保存的 PTH 模型(如果使用的DeepSpeed,则将会是一个文件夹)转换为 HuggingFace 模型: ```shell - xtuner convert pth_to_hf ${CONFIG} ${PTH} ${SAVE_PATH} + xtuner convert pth_to_hf ${CONFIG_NAME_OR_PATH} ${PTH} ${SAVE_PATH} ``` ### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) From b88024830775f8659b1ee4bdbab4be60c97f42f7 Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 12:08:25 +0800 Subject: [PATCH 22/25] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dcc0c29d7..5a48eb6e9 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar - **Step 2**, convert the saved PTH model (if using DeepSpeed, it will be a directory) to HuggingFace model, by ```shell - xtuner convert pth_to_hf ${CONFIG} ${PTH} ${SAVE_PATH} + xtuner convert pth_to_hf ${CONFIG_NAME_OR_PATH} ${PTH} ${SAVE_PATH} ``` ### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) From d4ece548aa3ca1b66b7d6d53052bd0d3c914befe Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 15:20:43 +0800 Subject: [PATCH 23/25] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 5a48eb6e9..e8702aa5d 100644 --- a/README.md +++ b/README.md @@ -221,6 +221,8 @@ For more examples, please see [chat.md](./docs/en/user_guides/chat.md). --seed 0 ``` + 🔥 Seeking efficient inference with less GPU memory? Try 4-bit quantization from [LMDeploy](https://github.com/InternLM/lmdeploy)! For more details, see [here](https://github.com/InternLM/lmdeploy/tree/main#quantization). + 🎯 We are woking closely with [LMDeploy](https://github.com/InternLM/lmdeploy), to implement the deployment of **plugin-based chat**! ### Evaluation From 4f8b2ddfa0bbf5ea622a9a7efaf28fcb1b337f44 Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 15:22:10 +0800 Subject: [PATCH 24/25] Update README_zh-CN.md --- README_zh-CN.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README_zh-CN.md b/README_zh-CN.md index 69229f99c..4cf95a943 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -219,6 +219,7 @@ xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003- --top_p 0.95 \ --seed 0 ``` + 🔥 追求速度更快、显存占用更低的推理?欢迎体验 [LMDeploy](https://github.com/InternLM/lmdeploy) 提供的 4-bit 量化!使用指南请见[文档](https://github.com/InternLM/lmdeploy/tree/main#quantization)。 🎯 我们正在与 [LMDeploy](https://github.com/InternLM/lmdeploy) 紧密合作,以实现基于插件对话的部署! From 27d42e58a2f55625bf7bc157fee2da092c2eb9ea Mon Sep 17 00:00:00 2001 From: LZHgrla Date: Tue, 5 Sep 2023 15:22:35 +0800 Subject: [PATCH 25/25] fix pre-commit --- README_zh-CN.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README_zh-CN.md b/README_zh-CN.md index 4cf95a943..5786e1364 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -219,6 +219,7 @@ xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003- --top_p 0.95 \ --seed 0 ``` + 🔥 追求速度更快、显存占用更低的推理?欢迎体验 [LMDeploy](https://github.com/InternLM/lmdeploy) 提供的 4-bit 量化!使用指南请见[文档](https://github.com/InternLM/lmdeploy/tree/main#quantization)。 🎯 我们正在与 [LMDeploy](https://github.com/InternLM/lmdeploy) 紧密合作,以实现基于插件对话的部署!