From ba0f5b2358f550a06f2c35ac71ebb053c80bbb22 Mon Sep 17 00:00:00 2001
From: LZHgrla <linzhihao@pjlab.org.cn>
Date: Mon, 4 Sep 2023 19:37:48 +0800
Subject: [PATCH 01/25] refactor tools

---
 xtuner/tools/chat.py                          | 104 ++++----
 xtuner/tools/chat_hf.py                       | 235 ------------------
 .../tools/model_converters/adapter_pth2hf.py  |  14 +-
 .../tools/model_converters/merge_adapter.py   |  76 ++----
 .../model_converters/merge_adapter_hf.py      |  45 ----
 5 files changed, 81 insertions(+), 393 deletions(-)
 delete mode 100644 xtuner/tools/chat_hf.py
 delete mode 100644 xtuner/tools/model_converters/merge_adapter_hf.py

diff --git a/xtuner/tools/chat.py b/xtuner/tools/chat.py
index db3128e65..2af9143a4 100644
--- a/xtuner/tools/chat.py
+++ b/xtuner/tools/chat.py
@@ -1,35 +1,46 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import argparse
-import os
 import re
 
 import torch
-from mmengine.config import Config, DictAction
-from transformers import GenerationConfig
+from peft import PeftModel
+from transformers import (AutoModelForCausalLM, AutoTokenizer,
+                          BitsAndBytesConfig, GenerationConfig)
 
-from xtuner.configs import cfgs_name_path
-from xtuner.registry import BUILDER
 from xtuner.tools.utils import get_chat_utils, update_stop_criteria
 from xtuner.utils import PROMPT_TEMPLATE
 
 
+def remove_prefix(state_dict, prefix):
+    new_state_dict = {}
+    for key, value in state_dict.items():
+        if key.startswith(prefix):
+            new_key = key[len(prefix):]
+            new_state_dict[new_key] = value
+        else:
+            new_state_dict[key] = value
+    return new_state_dict
+
+
 def parse_args():
-    parser = argparse.ArgumentParser(
-        description='Chat with a pretrained model')
+    parser = argparse.ArgumentParser(description='Chat with a HF model')
     parser.add_argument(
-        'config',
-        help='config file name or path. Note: Please use the original '
-        'configs, instead of the automatically saved log configs.')
-    parser.add_argument('--adapter', default=None, help='adapter model')
+        'model_name_or_path', help='Hugging Face model name or path')
+    parser.add_argument('--pretrained', default=None, help='pretrained path')
+    parser.add_argument('--adapter', default=None, help='adapter name or path')
     parser.add_argument(
         '--prompt-template',
         choices=PROMPT_TEMPLATE.keys(),
         default=None,
         help='Specify a prompt option')
     parser.add_argument(
-        '--is-deepspeed',
-        action='store_true',
-        help='whether the adapter is saved from deepspeed')
+        '--bits',
+        type=int,
+        choices=[4, 8, None],
+        default=None,
+        help='LLM bits')
+    parser.add_argument(
+        '--bot-name', type=str, default='BOT', help='Name for Bot')
     parser.add_argument(
         '--with-plugins',
         nargs='+',
@@ -67,16 +78,6 @@ def parse_args():
         type=int,
         default=0,
         help='Random seed for reproducible text generation')
-    parser.add_argument(
-        '--cfg-options',
-        nargs='+',
-        action=DictAction,
-        help='override some settings in the used config, the key-value pair '
-        'in xxx=yyy format will be merged into config file. If the value to '
-        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
-        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
-        'Note that the quotation marks are necessary and that no white space '
-        'is allowed.')
     args = parser.parse_args()
     return args
 
@@ -119,29 +120,36 @@ def main():
 
     torch.manual_seed(args.seed)
 
-    # parse config
-    if not os.path.isfile(args.config):
-        try:
-            args.config = cfgs_name_path[args.config]
-        except KeyError:
-            raise FileNotFoundError(f'Cannot find {args.config}')
-
-    # load config
-    cfg = Config.fromfile(args.config)
-    if args.cfg_options is not None:
-        cfg.merge_from_dict(args.cfg_options)
-
-    model = BUILDER.build(cfg.model)
-    # Cast to inference mode
-    model.llm.gradient_checkpointing_disable()
-    model.llm.config.use_cache = True
-
-    tokenizer = BUILDER.build(cfg.tokenizer)
-
+    # build model
+    quantization_config = None
+    load_in_8bit = False
+    if args.bits == 4:
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            load_in_8bit=False,
+            llm_int8_threshold=6.0,
+            llm_int8_has_fp16_weight=False,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type='nf4')
+    elif args.bits == 8:
+        load_in_8bit = True
+    assert args.pretrained is None or args.bits is None
+    model = AutoModelForCausalLM.from_pretrained(
+        args.model_name_or_path,
+        quantization_config=quantization_config,
+        load_in_8bit=load_in_8bit,
+        device_map='auto',
+        trust_remote_code=True)
+    if args.pretrained is not None:
+        pretrained_ckpt = torch.load(args.pretrained, map_location='cpu')
+        pretrained_ckpt = remove_prefix(pretrained_ckpt, 'llm.')
+        model.load_state_dict(pretrained_ckpt)
+        print(f'Load pretrained weight from {args.pretrained}')
+    tokenizer = AutoTokenizer.from_pretrained(
+        args.model_name_or_path, trust_remote_code=True)
     if args.adapter is not None:
-        adapter = torch.load(args.adapter, map_location='cpu')
-        state_dict_key = 'module' if args.is_deepspeed else 'state_dict'
-        model.load_state_dict(adapter[state_dict_key], strict=False)
+        model = PeftModel.from_pretrained(model, args.adapter)
         print(f'Load adapter from {args.adapter}')
 
     Streamer, stop_criteria = get_chat_utils(model)
@@ -173,10 +181,10 @@ def main():
             template = PROMPT_TEMPLATE[args.prompt_template]
             if 'INSTRUCTION_START' in template and n_turn == 0:
                 prompt_text = template['INSTRUCTION_START'].format(
-                    input=text, round=n_turn + 1, **cfg)
+                    input=text, round=n_turn + 1, bot_name=args.bot_name)
             else:
                 prompt_text = template['INSTRUCTION'].format(
-                    input=text, round=n_turn + 1, **cfg)
+                    input=text, round=n_turn + 1, bot_name=args.bot_name)
             if args.prompt_template == 'moss_sft':
                 if not inner_thoughts_open:
                     prompt_text.replace('- Inner thoughts: enabled.',
diff --git a/xtuner/tools/chat_hf.py b/xtuner/tools/chat_hf.py
deleted file mode 100644
index 7cc81d89a..000000000
--- a/xtuner/tools/chat_hf.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import argparse
-import re
-
-import torch
-from peft import PeftModel
-from transformers import (AutoModelForCausalLM, AutoTokenizer,
-                          BitsAndBytesConfig, GenerationConfig)
-
-from xtuner.tools.utils import get_chat_utils, update_stop_criteria
-from xtuner.utils import PROMPT_TEMPLATE
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(description='Chat with a HF model')
-    parser.add_argument(
-        'model_name_or_path', help='Hugging Face model name or path')
-    parser.add_argument('--adapter', default=None, help='adapter name or path')
-    parser.add_argument(
-        '--prompt-template',
-        choices=PROMPT_TEMPLATE.keys(),
-        default=None,
-        help='Specify a prompt option')
-    parser.add_argument(
-        '--bot-name', type=str, default='BOT', help='Name for Bot')
-    parser.add_argument(
-        '--with-plugins',
-        nargs='+',
-        choices=['calculate', 'solve', 'search'],
-        help='Specify plugins to use')
-    parser.add_argument(
-        '--no-streamer', action='store_true', help='Whether to with streamer')
-    parser.add_argument('--command-stop-word', default=None, help='Stop key')
-    parser.add_argument('--answer-stop-word', default=None, help='Stop key')
-    parser.add_argument(
-        '--max-new-tokens',
-        type=int,
-        default=2048,
-        help='Maximum number of new tokens allowed in generated text')
-    parser.add_argument(
-        '--temperature',
-        type=float,
-        default=0.1,
-        help='The value used to modulate the next token probabilities.')
-    parser.add_argument(
-        '--top-k',
-        type=int,
-        default=40,
-        help='The number of highest probability vocabulary tokens to '
-        'keep for top-k-filtering.')
-    parser.add_argument(
-        '--top-p',
-        type=float,
-        default=0.75,
-        help='If set to float < 1, only the smallest set of most probable '
-        'tokens with probabilities that add up to top_p or higher are '
-        'kept for generation.')
-    parser.add_argument(
-        '--seed',
-        type=int,
-        default=0,
-        help='Random seed for reproducible text generation')
-    args = parser.parse_args()
-    return args
-
-
-def get_input():
-    """Helper function for getting input from users."""
-    sentinel = ''  # ends when this string is seen
-    result = None
-    while result is None:
-        print('\ndouble enter to end input >>> ', end='')
-        try:
-            result = '\n'.join(iter(input, sentinel))
-        except UnicodeDecodeError:
-            print('Invalid characters detected. Please enter again.')
-    return result
-
-
-def main():
-    args = parse_args()
-
-    if args.with_plugins is None:
-        inner_thoughts_open = False
-        calculate_open = False
-        solve_open = False
-        search_open = False
-    else:
-        assert args.prompt_template == 'moss_sft'
-        from plugins import plugins_api
-        inner_thoughts_open = True
-        calculate_open = 'calculate' in args.with_plugins
-        solve_open = 'solve' in args.with_plugins
-        search_open = 'search' in args.with_plugins
-        # pre-import for api and model preparation
-        if calculate_open:
-            from plugins import calculate  # noqa: F401
-        if solve_open:
-            from plugins import solve  # noqa: F401
-        if search_open:
-            from plugins import search  # noqa: F401
-
-    torch.manual_seed(args.seed)
-
-    # build model
-    quantization_config = BitsAndBytesConfig(
-        load_in_4bit=True,
-        load_in_8bit=False,
-        llm_int8_threshold=6.0,
-        llm_int8_has_fp16_weight=False,
-        bnb_4bit_compute_dtype=torch.float16,
-        bnb_4bit_use_double_quant=True,
-        bnb_4bit_quant_type='nf4')
-    model = AutoModelForCausalLM.from_pretrained(
-        args.model_name_or_path,
-        quantization_config=quantization_config,
-        trust_remote_code=True)
-    tokenizer = AutoTokenizer.from_pretrained(
-        args.model_name_or_path, trust_remote_code=True)
-    if args.adapter is not None:
-        model = PeftModel.from_pretrained(model, args.adapter)
-        print(f'Load adapter from {args.adapter}')
-
-    Streamer, stop_criteria = get_chat_utils(model)
-    if args.no_streamer:
-        Streamer = None
-
-    command_stop_cr, answer_stop_cr = update_stop_criteria(
-        base=stop_criteria,
-        tokenizer=tokenizer,
-        command_stop_word=args.command_stop_word,
-        answer_stop_word=args.answer_stop_word)
-
-    gen_config = GenerationConfig(
-        max_new_tokens=args.max_new_tokens,
-        do_sample=args.temperature > 0,
-        temperature=args.temperature,
-        top_p=args.top_p,
-        top_k=args.top_k,
-    )
-
-    n_turn = 0
-    inputs = ''
-    while True:
-        text = get_input()
-
-        if text == 'exit':
-            exit(0)
-        if args.prompt_template is not None:
-            template = PROMPT_TEMPLATE[args.prompt_template]
-            if 'INSTRUCTION_START' in template and n_turn == 0:
-                prompt_text = template['INSTRUCTION_START'].format(
-                    input=text, round=n_turn + 1, bot_name=args.bot_name)
-            else:
-                prompt_text = template['INSTRUCTION'].format(
-                    input=text, round=n_turn + 1, bot_name=args.bot_name)
-            if args.prompt_template == 'moss_sft':
-                if not inner_thoughts_open:
-                    prompt_text.replace('- Inner thoughts: enabled.',
-                                        '- Inner thoughts: disabled.')
-                if not calculate_open:
-                    prompt_text.replace(
-                        '- Calculator: enabled. API: Calculate(expression)',
-                        '- Calculator: disabled.')
-                if not solve_open:
-                    prompt_text.replace(
-                        '- Equation solver: enabled. API: Solve(equation)',
-                        '- Equation solver: disabled.')
-                if not search_open:
-                    prompt_text.replace(
-                        '- Web search: enabled. API: Search(query)',
-                        '- Web search: disabled.')
-
-            inputs += prompt_text
-        else:
-            inputs += text
-        ids = tokenizer.encode(inputs, return_tensors='pt')
-        streamer = Streamer(tokenizer) if Streamer is not None else None
-        if args.with_plugins is not None:
-            generate_output = model.generate(
-                inputs=ids.cuda(),
-                generation_config=gen_config,
-                streamer=streamer,
-                stopping_criteria=command_stop_cr).cpu()
-            generate_output_text = tokenizer.decode(
-                generate_output[0][len(ids[0]):])
-            if streamer is None:
-                end = '' if generate_output_text[-1] == '\n' else '\n'
-                print(generate_output_text, end=end)
-            pattern = r'<\|Commands\|>:(.*?)<eoc>'
-            command_text = ', '.join(re.findall(pattern, generate_output_text))
-            extent_text = plugins_api(
-                command_text,
-                calculate_open=calculate_open,
-                solve_open=solve_open,
-                search_open=search_open)
-            end = '' if extent_text[-1] == '\n' else '\n'
-            print(extent_text, end=end)
-            extent_text_ids = tokenizer.encode(
-                extent_text, return_tensors='pt', add_special_tokens=False)
-            new_ids = torch.cat((generate_output, extent_text_ids), dim=1)
-            new_streamer = Streamer(
-                tokenizer) if Streamer is not None else None
-            generate_output = model.generate(
-                inputs=new_ids.cuda(),
-                generation_config=gen_config,
-                streamer=new_streamer,
-                stopping_criteria=answer_stop_cr)
-            if streamer is None:
-                output_text = tokenizer.decode(
-                    generate_output[0][len(new_ids[0]):])
-                end = '' if output_text[-1] == '\n' else '\n'
-                print(output_text, end=end)
-        else:
-            generate_output = model.generate(
-                inputs=ids.cuda(),
-                generation_config=gen_config,
-                streamer=streamer,
-                stopping_criteria=answer_stop_cr)
-            if streamer is None:
-                output_text = tokenizer.decode(
-                    generate_output[0][len(ids[0]):])
-                end = '' if output_text[-1] == '\n' else '\n'
-                print(output_text, end=end)
-        inputs = tokenizer.decode(generate_output[0])
-        n_turn += 1
-        if len(generate_output[0]) >= args.max_new_tokens:
-            print('Remove the memory of history responses, since '
-                  f'it exceeds the length limitation {args.max_new_tokens}.')
-            n_turn = 0
-            inputs = ''
-
-
-if __name__ == '__main__':
-    main()
diff --git a/xtuner/tools/model_converters/adapter_pth2hf.py b/xtuner/tools/model_converters/adapter_pth2hf.py
index 80ae2e209..ce69c90a9 100644
--- a/xtuner/tools/model_converters/adapter_pth2hf.py
+++ b/xtuner/tools/model_converters/adapter_pth2hf.py
@@ -54,18 +54,12 @@ def main():
     if args.cfg_options is not None:
         cfg.merge_from_dict(args.cfg_options)
 
-    # load on cpu
-    cfg.model.llm.device_map = 'cpu'
-    if cfg.model.llm.get('quantization_config'):
-        cfg.model.llm.quantization_config.\
-            llm_int8_enable_fp32_cpu_offload = True
-
     model = BUILDER.build(cfg.model)
 
-    adapter_checkpoint = torch.load(
-        args.adapter_checkpoint, map_location='cpu')
-    state_dict_key = 'module' if args.is_deepspeed else 'state_dict'
-    model.load_state_dict(adapter_checkpoint[state_dict_key], strict=False)
+    state_dict = torch.load(args.adapter_checkpoint, map_location='cpu')
+    if not args.is_deepspeed:
+        state_dict = state_dict['state_dict']
+    model.load_state_dict(state_dict, strict=False)
     print(f'Load adapter from {args.adapter_checkpoint}')
 
     mkdir_or_exist(args.save_dir)
diff --git a/xtuner/tools/model_converters/merge_adapter.py b/xtuner/tools/model_converters/merge_adapter.py
index 7383f23dc..2de6bc23a 100644
--- a/xtuner/tools/model_converters/merge_adapter.py
+++ b/xtuner/tools/model_converters/merge_adapter.py
@@ -1,74 +1,40 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import argparse
-import os
 
 import torch
-from mmengine.config import Config, DictAction
-
-from xtuner.configs import cfgs_name_path
-from xtuner.registry import BUILDER
+from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 def parse_args():
-    parser = argparse.ArgumentParser(description='Merge a pth adapter to LLM')
-    parser.add_argument(
-        'config',
-        help='config file name or path. Note: Please use the original '
-        'configs, instead of the automatically saved log configs.')
-    parser.add_argument('adapter_checkpoint', help='adapter checkpoint file')
+    parser = argparse.ArgumentParser(
+        description='Merge a HuggingFace adapter to LLM')
+    parser.add_argument('model_name_or_path', help='model name or path')
+    parser.add_argument('adapter_name_or_path', help='adapter name or path')
     parser.add_argument(
         'save_dir', help='the directory to save the merged model')
     parser.add_argument('--max-shard-size', type=str, default='2GB')
-    parser.add_argument(
-        '--is-deepspeed',
-        action='store_true',
-        help='whether the adapter is saved from deepspeed')
-    parser.add_argument(
-        '--cfg-options',
-        nargs='+',
-        action=DictAction,
-        help='override some settings in the used config, the key-value pair '
-        'in xxx=yyy format will be merged into config file. If the value to '
-        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
-        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
-        'Note that the quotation marks are necessary and that no white space '
-        'is allowed.')
     args = parser.parse_args()
     return args
 
 
 def main():
     args = parse_args()
-
-    # parse config
-    if not os.path.isfile(args.config):
-        try:
-            args.config = cfgs_name_path[args.config]
-        except KeyError:
-            raise FileNotFoundError(f'Cannot find {args.config}')
-
-    # load config
-    cfg = Config.fromfile(args.config)
-    if args.cfg_options is not None:
-        cfg.merge_from_dict(args.cfg_options)
-
-    # load on cpu, with non-quantized
-    cfg.model.llm.device_map = 'cpu'
-    cfg.model.llm.quantization_config = None
-    cfg.model.llm.low_cpu_mem_usage = True
-    torch_dtype = cfg.model.llm.get('torch_dtype', torch.float16)
-    model = BUILDER.build(cfg.model)
-    tokenizer = BUILDER.build(cfg.tokenizer)
-    adapter_checkpoint = torch.load(
-        args.adapter_checkpoint, map_location='cpu')
-    state_dict_key = 'module' if args.is_deepspeed else 'state_dict'
-    model.load_state_dict(adapter_checkpoint[state_dict_key], strict=False)
-    print(f'Load adapter from {args.adapter_checkpoint}')
-
-    model = model.llm
-    model_merged = model.merge_and_unload()
-    for param in model.parameters():
-        param.data = param.data.to(torch_dtype)
+    model = AutoModelForCausalLM.from_pretrained(
+        args.model_name_or_path,
+        torch_dtype=torch.float16,
+        low_cpu_mem_usage=True,
+        device_map='cpu',
+        trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained(
+        args.model_name_or_path, trust_remote_code=True)
+    model_unmerged = PeftModel.from_pretrained(
+        model,
+        args.adapter_name_or_path,
+        device_map='cpu',
+        torch_dtype=torch.float16,
+        is_trainable=False)
+    model_merged = model_unmerged.merge_and_unload()
     model_merged.save_pretrained(
         args.save_dir, max_shard_size=args.max_shard_size)
     tokenizer.save_pretrained(args.save_dir)
diff --git a/xtuner/tools/model_converters/merge_adapter_hf.py b/xtuner/tools/model_converters/merge_adapter_hf.py
deleted file mode 100644
index 2de6bc23a..000000000
--- a/xtuner/tools/model_converters/merge_adapter_hf.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import argparse
-
-import torch
-from peft import PeftModel
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description='Merge a HuggingFace adapter to LLM')
-    parser.add_argument('model_name_or_path', help='model name or path')
-    parser.add_argument('adapter_name_or_path', help='adapter name or path')
-    parser.add_argument(
-        'save_dir', help='the directory to save the merged model')
-    parser.add_argument('--max-shard-size', type=str, default='2GB')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = parse_args()
-    model = AutoModelForCausalLM.from_pretrained(
-        args.model_name_or_path,
-        torch_dtype=torch.float16,
-        low_cpu_mem_usage=True,
-        device_map='cpu',
-        trust_remote_code=True)
-    tokenizer = AutoTokenizer.from_pretrained(
-        args.model_name_or_path, trust_remote_code=True)
-    model_unmerged = PeftModel.from_pretrained(
-        model,
-        args.adapter_name_or_path,
-        device_map='cpu',
-        torch_dtype=torch.float16,
-        is_trainable=False)
-    model_merged = model_unmerged.merge_and_unload()
-    model_merged.save_pretrained(
-        args.save_dir, max_shard_size=args.max_shard_size)
-    tokenizer.save_pretrained(args.save_dir)
-    print(f'Save to {args.save_dir}')
-
-
-if __name__ == '__main__':
-    main()

From 742b0e0c0385b6918e7ef6a034251e04171ee718 Mon Sep 17 00:00:00 2001
From: LZHgrla <linzhihao@pjlab.org.cn>
Date: Mon, 4 Sep 2023 19:38:10 +0800
Subject: [PATCH 02/25] modify entry_point

---
 xtuner/entry_point.py | 52 ++++++++-----------------------------------
 1 file changed, 9 insertions(+), 43 deletions(-)

diff --git a/xtuner/entry_point.py b/xtuner/entry_point.py
index 628610d38..c57ff2810 100644
--- a/xtuner/entry_point.py
+++ b/xtuner/entry_point.py
@@ -8,10 +8,10 @@
 from mmengine.logging import print_log
 
 import xtuner
-from xtuner.tools import chat, chat_hf, copy_cfg, list_cfg, test, train
+from xtuner.tools import chat, copy_cfg, list_cfg, test, train
 from xtuner.tools.data_preprocess import arxiv as arxiv_preprocess
 from xtuner.tools.model_converters import (adapter_pth2hf, merge_adapter,
-                                           merge_adapter_hf, split_hf_llm)
+                                           split_hf_llm)
 
 # Define valid modes
 MODES = ('list-cfg', 'copy-cfg', 'train', 'test', 'chat', 'convert',
@@ -37,17 +37,13 @@
             xtuner train $CONFIG
         3-2. Fine-tune LLMs by multiple GPUs:
             NPROC_PER_NODE=$NGPUS NNODES=$NNODES NODE_RANK=$NODE_RANK PORT=$PORT ADDR=$ADDR xtuner dist_train $CONFIG $GPUS
-        4-1. Chat with LLMs with HuggingFace's model and adapter:
-            xtuner chat hf $NAME_OR_PATH_TO_HF_MODEL --adapter $NAME_OR_PATH_TO_HF_ADAPTER --prompt-template $PROMPT_TEMPLATE
-        4-2. Chat with LLMs with XTuner's config and adapter:
-            xtuner chat xtuner $CONFIG --adapter $PATH_TO_PTH_ADAPTER --prompt $PROMPT_TEMPLATE
+        4. Chat with LLMs with HuggingFace's model and adapter:
+            xtuner chat $NAME_OR_PATH_TO_HF_MODEL --adapter $NAME_OR_PATH_TO_HF_ADAPTER --prompt-template $PROMPT_TEMPLATE
         5-1. Convert the pth adapter to HuggingFace's adapter:
             xtuner convert adapter_pth2hf $CONFIG $PATH_TO_PTH_ADAPTER $SAVE_PATH_TO_HF_ADAPTER
         5-2. Merge the HuggingFace's adapter to the pretrained LLM:
-            xtuner convert merge_adapter_hf $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH
-        5-3. Merge the XTuner's adapter to the pretraiend LLM:
-            xtuner convert merge_adapter $CONFIG $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH
-        5-4. Split HuggingFace's LLM to the smallest sharded one:
+            xtuner convert merge_adapter $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH
+        5-3. Split HuggingFace's LLM to the smallest sharded one:
             xtuner convert split_hf_llm $NAME_OR_PATH_TO_HF_MODEL $SAVE_PATH
         6-1. Preprocess arxiv dataset:
             xtuner preprocess arxiv $SRC_FILE $DST_FILE --start-date $START_DATE --categories $CATEGORIES
@@ -76,11 +72,8 @@
         1. Convert the pth adapter to HuggingFace's adapter:
             xtuner convert adapter_pth2hf $CONFIG $PATH_TO_PTH_ADAPTER $SAVE_PATH_TO_HF_ADAPTER
         2. Merge the HuggingFace's adapter to the pretrained LLM:
-            xtuner convert merge_adapter_hf $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH
-        3. Merge the XTuner's
-        adapter to the pretraiend LLM:
-            xtuner convert merge_adapter $CONFIG $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH
-        4. Split HuggingFace's LLM to the smallest sharded one:
+            xtuner convert merge_adapter $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH
+        3. Split HuggingFace's LLM to the smallest sharded one:
             xtuner convert split_hf_llm $NAME_OR_PATH_TO_HF_MODEL $SAVE_PATH
 
     GitHub: https://github.com/InternLM/xtuner
@@ -105,27 +98,6 @@
     GitHub: https://github.com/InternLM/xtuner
     """  # noqa: E501
 
-
-CHAT_HELP_MSG = \
-    f"""
-    Arguments received: {str(['xtuner'] + sys.argv[1:])}. xtuner commands use the following syntax:
-
-        xtuner MODE MODE_ARGS ARGS
-
-        Where   MODE (required) is one of {MODES}
-                MODE_ARG (optional) is the argument for specific mode
-                ARGS (optional) are the arguments for specific command
-
-    Some usages for chat: (See more by using -h for specific command!)
-
-        1. Chat with LLMs with HuggingFace's model and adapter:
-            xtuner chat hf $NAME_OR_PATH_TO_HF_MODEL --adapter $NAME_OR_PATH_TO_HF_ADAPTER --prompt-template $PROMPT_TEMPLATE
-        2. Chat with LLMs with XTuner's config and adapter:
-            xtuner chat xtuner internlm_7b_qlora_alpaca --adapter $PATH_TO_PTH_ADAPTER --prompt $PROMPT_TEMPLATE
-
-    GitHub: https://github.com/InternLM/xtuner
-    """  # noqa: E501
-
 special = {
     'help': lambda: print_log(CLI_HELP_MSG, 'current'),
     'version': lambda: print_log(xtuner.__version__, 'current')
@@ -143,16 +115,10 @@
     'copy-cfg': copy_cfg.__file__,
     'train': train.__file__,
     'test': test.__file__,
-    'chat': {
-        'hf': chat_hf.__file__,
-        'xtuner': chat.__file__,
-        '--help': lambda: print_log(CHAT_HELP_MSG, 'current'),
-        '-h': lambda: print_log(CHAT_HELP_MSG, 'current')
-    },
+    'chat': chat.__file__,
     'convert': {
         'adapter_pth2hf': adapter_pth2hf.__file__,
         'merge_adapter': merge_adapter.__file__,
-        'merge_adapter_hf': merge_adapter_hf.__file__,
         'split_hf_llm': split_hf_llm.__file__,
         '--help': lambda: print_log(CONVERT_HELP_MSG, 'current'),
         '-h': lambda: print_log(CONVERT_HELP_MSG, 'current')

From 94844443ea46af6b5bee7f861e5591553e283379 Mon Sep 17 00:00:00 2001
From: LZHgrla <linzhihao@pjlab.org.cn>
Date: Mon, 4 Sep 2023 19:38:41 +0800
Subject: [PATCH 03/25] modify docs

---
 README.md                      |  2 +-
 README_zh-CN.md                |  2 +-
 docs/en/user_guides/chat.md    | 36 +++++++++++++++++-----------------
 docs/zh_cn/user_guides/chat.md | 36 +++++++++++++++++-----------------
 4 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/README.md b/README.md
index 7879ea4d4..135f8b16f 100644
--- a/README.md
+++ b/README.md
@@ -147,7 +147,7 @@ XTuner provides tools to chat with pretrained / fine-tuned LLMs.
 - For example, we can start the chat with Llama2-7B-Plugins by
 
   ```shell
-  xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
+    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
   ```
 
 For more examples, please see [chat.md](./docs/en/user_guides/chat.md).
diff --git a/README_zh-CN.md b/README_zh-CN.md
index bce94d4e2..e2d637354 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -147,7 +147,7 @@ XTuner 提供与大语言模型对话的工具。
 - 例如，与基于插件微调获得的 Llama2-7B-Plugins 对话：
 
   ```shell
-  xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
+    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
   ```
 
 更多示例，请查阅[文档](./docs/zh_cn/user_guides/chat.md)。
diff --git a/docs/en/user_guides/chat.md b/docs/en/user_guides/chat.md
index 2914296f0..9b9e35a87 100644
--- a/docs/en/user_guides/chat.md
+++ b/docs/en/user_guides/chat.md
@@ -5,49 +5,49 @@
 - InternLM-7B, oasst1
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant
   ```
 
 - InternLM-7B, Arxiv Gentitle
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title
   ```
 
 - InternLM-7B, Colorist
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist
   ```
 
 - InternLM-7B, Coder
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code
   ```
 
 - InternLM-7B, SQL
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql
   ```
 
 - InternLM-7B, Lawyer
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer
   ```
 
 - InternLM-7B, Open-Platypus
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca
   ```
 
 - InternLM-7B, Alpaca-enzh
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca
   ```
 
 ## Chat with [Llama2](https://github.com/facebookresearch/llama)
@@ -58,19 +58,19 @@
 
   ```shell
   export SERPER_API_KEY="xxx"  # Please get the key from https://serper.dev to support google search!
-  xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
+    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
   ```
 
 - Llama2-7B, Arxiv Gentitle
 
   ```shell
-  xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title
+    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title
   ```
 
 - Llama2-7B, Colorist
 
   ```shell
-  xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist
+    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist
   ```
 
 ## Chat with [Qwen](https://github.com/QwenLM)
@@ -79,25 +79,25 @@
 
   ```shell
   export SERPER_API_KEY="xxx"  # Please get the key from https://serper.dev to support google search!
-  xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>"
+    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>"
   ```
 
 - Qwen-7B, oasst1
 
   ```shell
-  xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>'
+    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>'
   ```
 
 - Qwen-7B, Arxiv Gentitle
 
   ```shell
-  xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>'
+    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>'
   ```
 
 - Qwen-7B, Alpaca-enzh
 
   ```shell
-  xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>'
+    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>'
   ```
 
 ## Chat with [Baichuan](https://github.com/baichuan-inc)
@@ -105,17 +105,17 @@
 - Baichuan-7B, oasst1
 
   ```shell
-  xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant
+    xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant
   ```
 
 - Baichuan-7B, Arxiv Gentitle
 
   ```shell
-  xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer
+    xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer
   ```
 
 - Baichuan-7B, Alpaca-enzh
 
   ```shell
-  xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca
+    xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca
   ```
diff --git a/docs/zh_cn/user_guides/chat.md b/docs/zh_cn/user_guides/chat.md
index 1ae01388b..002cc89c8 100644
--- a/docs/zh_cn/user_guides/chat.md
+++ b/docs/zh_cn/user_guides/chat.md
@@ -5,49 +5,49 @@
 - InternLM-7B, oasst1
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant
   ```
 
 - InternLM-7B, Arxiv Gentitle
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title
   ```
 
 - InternLM-7B, Colorist
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist
   ```
 
 - InternLM-7B, Coder
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code
   ```
 
 - InternLM-7B, SQL
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql
   ```
 
 - InternLM-7B, Lawyer
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer
   ```
 
 - InternLM-7B, Open-Platypus
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca
   ```
 
 - InternLM-7B, Alpaca-enzh
 
   ```shell
-  xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca
+    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca
   ```
 
 ## 与微调后的 [Llama2](https://github.com/facebookresearch/llama) 对话
@@ -58,19 +58,19 @@
 
   ```shell
   export SERPER_API_KEY="xxx"  # 请从 https://serper.dev 获得API_KEY，以此支持谷歌搜索！
-  xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
+    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
   ```
 
 - Llama2-7B, Arxiv Gentitle
 
   ```shell
-  xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title
+    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title
   ```
 
 - Llama2-7B, Colorist
 
   ```shell
-  xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist
+    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist
   ```
 
 ## 与微调后的 [Qwen](https://github.com/QwenLM) 对话
@@ -79,25 +79,25 @@
 
   ```shell
   export SERPER_API_KEY="xxx"  # 请从 https://serper.dev 获得API_KEY，以此支持谷歌搜索！
-  xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>"
+    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>"
   ```
 
 - Qwen-7B, oasst1
 
   ```shell
-  xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>'
+    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>'
   ```
 
 - Qwen-7B, Arxiv Gentitle
 
   ```shell
-  xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>'
+    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>'
   ```
 
 - Qwen-7B, Alpaca-enzh
 
   ```shell
-  xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>'
+    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>'
   ```
 
 ## 与微调后的 [Baichuan](https://github.com/baichuan-inc) 对话
@@ -105,17 +105,17 @@
 - Baichuan-7B, oasst1
 
   ```shell
-  xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant
+    xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant
   ```
 
 - Baichuan-7B, Arxiv Gentitle
 
   ```shell
-  xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer
+    xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer
   ```
 
 - Baichuan-7B, Alpaca-enzh
 
   ```shell
-  xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca
+    xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca
   ```

From f60d74a5855c52efadd6764f70036f571a6d5373 Mon Sep 17 00:00:00 2001
From: LZHgrla <linzhihao@pjlab.org.cn>
Date: Mon, 4 Sep 2023 19:41:36 +0800
Subject: [PATCH 04/25] update docs

---
 README.md       | 12 ++++++------
 README_zh-CN.md | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 135f8b16f..d60e46b5a 100644
--- a/README.md
+++ b/README.md
@@ -180,9 +180,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
 
   For more examples, please see [finetune.md](./docs/en/user_guides/finetune.md).
 
-### Deployment
-
-- **Step 0**, convert the pth adapter to HuggingFace adapter, by
+- **Step 2** (optional), convert the pth adapter to HuggingFace adapter, by
 
   ```shell
   xtuner convert adapter_pth2hf \
@@ -191,12 +189,14 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
       ${SAVE_PATH_TO_HF_ADAPTER}
   ```
 
-  or, directly merge the pth adapter to pretrained LLM, by
+### Deployment
+
+- **Step 0**, merge the HuggingFace adapter to pretrained LLM, by
 
   ```shell
   xtuner convert merge_adapter \
-      ${CONFIG} \
-      ${PATH_TO_PTH_ADAPTER} \
+      ${NAME_OR_PATH_TO_LLM} \
+      ${PATH_TO_ADAPTER} \
       ${SAVE_PATH_TO_MERGED_LLM} \
       --max-shard-size 2GB
   ```
diff --git a/README_zh-CN.md b/README_zh-CN.md
index e2d637354..d182d722c 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -177,11 +177,9 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
   NPROC_PER_NODE=${GPU_NUM} xtuner train internlm_7b_qlora_oasst1_e3
   ```
 
-  更多示例，请查阅[文档](./docs/zh_cn/user_guides/finetune.md).
+  更多示例，请查阅[文档](./docs/zh_cn/user_guides/finetune.md)。
 
-### 部署
-
-- **步骤 0**，将 pth adapter 转换为 HuggingFace adapter：
+- **步骤 2**（可选），将 pth adapter 转换为 HuggingFace adapter：
 
   ```shell
   xtuner convert adapter_pth2hf \
@@ -190,11 +188,13 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
       ${SAVE_PATH_TO_HF_ADAPTER}
   ```
 
-  或者，直接将 pth adapter 合并到大语言模型：
+### 部署
+
+- **步骤 0**，将 HuggingFace adapter 合并到大语言模型：
 
   ```shell
   xtuner convert merge_adapter \
-      ${CONFIG} \
+      ${NAME_OR_PATH_TO_LLM} \
       ${PATH_TO_PTH_ADAPTER} \
       ${SAVE_PATH_TO_MERGED_LLM} \
       --max-shard-size 2GB

From 73adb7679804638d46e473a469de2f5ab45cb984 Mon Sep 17 00:00:00 2001
From: LZHgrla <linzhihao@pjlab.org.cn>
Date: Mon, 4 Sep 2023 19:45:28 +0800
Subject: [PATCH 05/25] fix

---
 README.md                      |  2 +-
 README_zh-CN.md                |  2 +-
 docs/en/user_guides/chat.md    | 36 +++++++++++++++++-----------------
 docs/zh_cn/user_guides/chat.md | 36 +++++++++++++++++-----------------
 4 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/README.md b/README.md
index d60e46b5a..2795f4f39 100644
--- a/README.md
+++ b/README.md
@@ -147,7 +147,7 @@ XTuner provides tools to chat with pretrained / fine-tuned LLMs.
 - For example, we can start the chat with Llama2-7B-Plugins by
 
   ```shell
-    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
+  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
   ```
 
 For more examples, please see [chat.md](./docs/en/user_guides/chat.md).
diff --git a/README_zh-CN.md b/README_zh-CN.md
index d182d722c..eb8b1f7c7 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -147,7 +147,7 @@ XTuner 提供与大语言模型对话的工具。
 - 例如，与基于插件微调获得的 Llama2-7B-Plugins 对话：
 
   ```shell
-    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
+  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
   ```
 
 更多示例，请查阅[文档](./docs/zh_cn/user_guides/chat.md)。
diff --git a/docs/en/user_guides/chat.md b/docs/en/user_guides/chat.md
index 9b9e35a87..65725f7a3 100644
--- a/docs/en/user_guides/chat.md
+++ b/docs/en/user_guides/chat.md
@@ -5,49 +5,49 @@
 - InternLM-7B, oasst1
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant
   ```
 
 - InternLM-7B, Arxiv Gentitle
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title
   ```
 
 - InternLM-7B, Colorist
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist
   ```
 
 - InternLM-7B, Coder
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code
   ```
 
 - InternLM-7B, SQL
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql
   ```
 
 - InternLM-7B, Lawyer
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer
   ```
 
 - InternLM-7B, Open-Platypus
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca
   ```
 
 - InternLM-7B, Alpaca-enzh
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca
   ```
 
 ## Chat with [Llama2](https://github.com/facebookresearch/llama)
@@ -58,19 +58,19 @@
 
   ```shell
   export SERPER_API_KEY="xxx"  # Please get the key from https://serper.dev to support google search!
-    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
+  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
   ```
 
 - Llama2-7B, Arxiv Gentitle
 
   ```shell
-    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title
+  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title
   ```
 
 - Llama2-7B, Colorist
 
   ```shell
-    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist
+  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist
   ```
 
 ## Chat with [Qwen](https://github.com/QwenLM)
@@ -79,25 +79,25 @@
 
   ```shell
   export SERPER_API_KEY="xxx"  # Please get the key from https://serper.dev to support google search!
-    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>"
+  xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>"
   ```
 
 - Qwen-7B, oasst1
 
   ```shell
-    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>'
+  xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>'
   ```
 
 - Qwen-7B, Arxiv Gentitle
 
   ```shell
-    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>'
+  xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>'
   ```
 
 - Qwen-7B, Alpaca-enzh
 
   ```shell
-    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>'
+  xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>'
   ```
 
 ## Chat with [Baichuan](https://github.com/baichuan-inc)
@@ -105,17 +105,17 @@
 - Baichuan-7B, oasst1
 
   ```shell
-    xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant
+  xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant
   ```
 
 - Baichuan-7B, Arxiv Gentitle
 
   ```shell
-    xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer
+  xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer
   ```
 
 - Baichuan-7B, Alpaca-enzh
 
   ```shell
-    xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca
+  xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca
   ```
diff --git a/docs/zh_cn/user_guides/chat.md b/docs/zh_cn/user_guides/chat.md
index 002cc89c8..6fef1684c 100644
--- a/docs/zh_cn/user_guides/chat.md
+++ b/docs/zh_cn/user_guides/chat.md
@@ -5,49 +5,49 @@
 - InternLM-7B, oasst1
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant
   ```
 
 - InternLM-7B, Arxiv Gentitle
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title
   ```
 
 - InternLM-7B, Colorist
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist
   ```
 
 - InternLM-7B, Coder
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code
   ```
 
 - InternLM-7B, SQL
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql
   ```
 
 - InternLM-7B, Lawyer
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer
   ```
 
 - InternLM-7B, Open-Platypus
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca
   ```
 
 - InternLM-7B, Alpaca-enzh
 
   ```shell
-    xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca
+  xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca
   ```
 
 ## 与微调后的 [Llama2](https://github.com/facebookresearch/llama) 对话
@@ -58,19 +58,19 @@
 
   ```shell
   export SERPER_API_KEY="xxx"  # 请从 https://serper.dev 获得API_KEY，以此支持谷歌搜索！
-    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
+  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
   ```
 
 - Llama2-7B, Arxiv Gentitle
 
   ```shell
-    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title
+  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title
   ```
 
 - Llama2-7B, Colorist
 
   ```shell
-    xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist
+  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist
   ```
 
 ## 与微调后的 [Qwen](https://github.com/QwenLM) 对话
@@ -79,25 +79,25 @@
 
   ```shell
   export SERPER_API_KEY="xxx"  # 请从 https://serper.dev 获得API_KEY，以此支持谷歌搜索！
-    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>"
+  xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>"
   ```
 
 - Qwen-7B, oasst1
 
   ```shell
-    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>'
+  xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>'
   ```
 
 - Qwen-7B, Arxiv Gentitle
 
   ```shell
-    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>'
+  xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>'
   ```
 
 - Qwen-7B, Alpaca-enzh
 
   ```shell
-    xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>'
+  xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>'
   ```
 
 ## 与微调后的 [Baichuan](https://github.com/baichuan-inc) 对话
@@ -105,17 +105,17 @@
 - Baichuan-7B, oasst1
 
   ```shell
-    xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant
+  xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant
   ```
 
 - Baichuan-7B, Arxiv Gentitle
 
   ```shell
-    xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer
+  xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer
   ```
 
 - Baichuan-7B, Alpaca-enzh
 
   ```shell
-    xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca
+  xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca
   ```

From 810769b0eb69221e20ebfc21eb278d1e88dda438 Mon Sep 17 00:00:00 2001
From: LZHgrla <linzhihao@pjlab.org.cn>
Date: Mon, 4 Sep 2023 19:47:45 +0800
Subject: [PATCH 06/25] fix

---
 README.md       | 60 ++++++++++++++++++++++++-------------------------
 README_zh-CN.md | 60 ++++++++++++++++++++++++-------------------------
 2 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/README.md b/README.md
index 2795f4f39..dfbb17205 100644
--- a/README.md
+++ b/README.md
@@ -123,35 +123,6 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor](
   pip install -e '.[all]'
   ```
 
-### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
-
-<table>
-<tr>
-  <th colspan="3" align="center">Examples of Plugins-based Chat 🔥🔥🔥</th>
-</tr>
-<tr>
-<td>
-<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/7c429d98-7630-4539-8aff-c89094826f8c"></a>
-</td>
-<td>
-<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/05d02906-5a82-45bc-b4e3-2cc32d473b2c"></a>
-</td>
-<td>
-<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/80395303-997a-47f2-b7d2-d585034df683"></a>
-</td>
-</tr>
-</table>
-
-XTuner provides tools to chat with pretrained / fine-tuned LLMs.
-
-- For example, we can start the chat with Llama2-7B-Plugins by
-
-  ```shell
-  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
-  ```
-
-For more examples, please see [chat.md](./docs/en/user_guides/chat.md).
-
 ### Fine-tune [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing)
 
 XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepare guides can be found on [dataset_prepare.md](./docs/en/user_guides/dataset_prepare.md).
@@ -180,7 +151,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
 
   For more examples, please see [finetune.md](./docs/en/user_guides/finetune.md).
 
-- **Step 2** (optional), convert the pth adapter to HuggingFace adapter, by
+- **Step 2**, convert the pth adapter to HuggingFace adapter, by
 
   ```shell
   xtuner convert adapter_pth2hf \
@@ -189,6 +160,35 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
       ${SAVE_PATH_TO_HF_ADAPTER}
   ```
 
+### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
+
+<table>
+<tr>
+  <th colspan="3" align="center">Examples of Plugins-based Chat 🔥🔥🔥</th>
+</tr>
+<tr>
+<td>
+<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/7c429d98-7630-4539-8aff-c89094826f8c"></a>
+</td>
+<td>
+<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/05d02906-5a82-45bc-b4e3-2cc32d473b2c"></a>
+</td>
+<td>
+<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/80395303-997a-47f2-b7d2-d585034df683"></a>
+</td>
+</tr>
+</table>
+
+XTuner provides tools to chat with pretrained / fine-tuned LLMs.
+
+- For example, we can start the chat with Llama2-7B-Plugins by
+
+  ```shell
+  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
+  ```
+
+For more examples, please see [chat.md](./docs/en/user_guides/chat.md).
+
 ### Deployment
 
 - **Step 0**, merge the HuggingFace adapter to pretrained LLM, by
diff --git a/README_zh-CN.md b/README_zh-CN.md
index eb8b1f7c7..5a4373852 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -123,35 +123,6 @@ XTuner 是一个轻量级微调大语言模型的工具库，由 [MMRazor](https
   pip install -e '.[all]'
   ```
 
-### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
-
-<table>
-<tr>
-  <th colspan="3" align="center">基于插件的对话 🔥🔥🔥</th>
-</tr>
-<tr>
-<td>
-<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/7c429d98-7630-4539-8aff-c89094826f8c"></a>
-</td>
-<td>
-<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/05d02906-5a82-45bc-b4e3-2cc32d473b2c"></a>
-</td>
-<td>
-<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/80395303-997a-47f2-b7d2-d585034df683"></a>
-</td>
-</tr>
-</table>
-
-XTuner 提供与大语言模型对话的工具。
-
-- 例如，与基于插件微调获得的 Llama2-7B-Plugins 对话：
-
-  ```shell
-  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
-  ```
-
-更多示例，请查阅[文档](./docs/zh_cn/user_guides/chat.md)。
-
 ### 微调 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing)
 
 XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](./docs/zh_cn/user_guides/dataset_prepare.md)。
@@ -179,7 +150,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
 
   更多示例，请查阅[文档](./docs/zh_cn/user_guides/finetune.md)。
 
-- **步骤 2**（可选），将 pth adapter 转换为 HuggingFace adapter：
+- **步骤 2**，将 pth adapter 转换为 HuggingFace adapter：
 
   ```shell
   xtuner convert adapter_pth2hf \
@@ -188,6 +159,35 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
       ${SAVE_PATH_TO_HF_ADAPTER}
   ```
 
+### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
+
+<table>
+<tr>
+  <th colspan="3" align="center">基于插件的对话 🔥🔥🔥</th>
+</tr>
+<tr>
+<td>
+<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/7c429d98-7630-4539-8aff-c89094826f8c"></a>
+</td>
+<td>
+<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/05d02906-5a82-45bc-b4e3-2cc32d473b2c"></a>
+</td>
+<td>
+<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/80395303-997a-47f2-b7d2-d585034df683"></a>
+</td>
+</tr>
+</table>
+
+XTuner 提供与大语言模型对话的工具。
+
+- 例如，与基于插件微调获得的 Llama2-7B-Plugins 对话：
+
+  ```shell
+  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
+  ```
+
+更多示例，请查阅[文档](./docs/zh_cn/user_guides/chat.md)。
+
 ### 部署
 
 - **步骤 0**，将 HuggingFace adapter 合并到大语言模型：

From e5ce42c0f9dabc78af2a28e75c7a181a926e1826 Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 10:10:12 +0800
Subject: [PATCH 07/25] Update README.md

---
 README.md | 66 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/README.md b/README.md
index dfbb17205..ba236ac34 100644
--- a/README.md
+++ b/README.md
@@ -26,9 +26,27 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor](
 
 ## 🌟 Demos
 
+- Ready-to-use models and datasets from XTuner API [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing)
 - QLoRA Fine-tune [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing)
 - Plugin-based Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
-- Ready-to-use models and datasets from XTuner API [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing)
+
+  <table>
+  <tr>
+    <th colspan="3" align="center">Examples of Plugins-based Chat 🔥🔥🔥</th>
+  </tr>
+  <tr>
+  <td>
+  <a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/7c429d98-7630-4539-8aff-c89094826f8c"></a>
+  </td>
+  <td>
+  <a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/05d02906-5a82-45bc-b4e3-2cc32d473b2c"></a>
+  </td>
+  <td>
+  <a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/80395303-997a-47f2-b7d2-d585034df683"></a>
+  </td>
+  </tr>
+  </table>
+
 
 ## 🔥 Supports
 
@@ -139,7 +157,13 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
   xtuner copy-cfg ${CONFIG_NAME} ${SAVE_DIR}
   ```
 
-- **Step 1**, start fine-tuning. For example, we can start the QLoRA fine-tuning of InternLM-7B with oasst1 dataset by
+- **Step 1**, start fine-tuning.
+
+  ```shell
+  xtuner train ${CONFIG_NAME_OR_PATH}
+  ```
+
+  For example, we can start the QLoRA fine-tuning of InternLM-7B with oasst1 dataset by
 
   ```shell
   # On a single GPU
@@ -151,41 +175,25 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
 
   For more examples, please see [finetune.md](./docs/en/user_guides/finetune.md).
 
-- **Step 2**, convert the pth adapter to HuggingFace adapter, by
+- **Step 2**, convert the saved pth model to HuggingFace model, by
 
   ```shell
-  xtuner convert adapter_pth2hf \
-      ${CONFIG} \
-      ${PATH_TO_PTH_ADAPTER} \
-      ${SAVE_PATH_TO_HF_ADAPTER}
+  xtuner convert pth2hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL}
   ```
 
 ### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
 
-<table>
-<tr>
-  <th colspan="3" align="center">Examples of Plugins-based Chat 🔥🔥🔥</th>
-</tr>
-<tr>
-<td>
-<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/7c429d98-7630-4539-8aff-c89094826f8c"></a>
-</td>
-<td>
-<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/05d02906-5a82-45bc-b4e3-2cc32d473b2c"></a>
-</td>
-<td>
-<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/80395303-997a-47f2-b7d2-d585034df683"></a>
-</td>
-</tr>
-</table>
-
 XTuner provides tools to chat with pretrained / fine-tuned LLMs.
 
-- For example, we can start the chat with Llama2-7B-Plugins by
+```shell
+xtuner chat ${NAME_OR_PATH_TO_LLM} --adapter {NAME_OR_PATH_TO_ADAPTER} [optional arguments]
+```
 
-  ```shell
-  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
-  ```
+For example, we can start the chat with Llama2-7b with adapter trained from MOSS-003-SFT by
+
+```shell
+xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
+```
 
 For more examples, please see [chat.md](./docs/en/user_guides/chat.md).
 
@@ -196,7 +204,7 @@ For more examples, please see [chat.md](./docs/en/user_guides/chat.md).
   ```shell
   xtuner convert merge_adapter \
       ${NAME_OR_PATH_TO_LLM} \
-      ${PATH_TO_ADAPTER} \
+      ${NAME_OR_PATH_TO_ADAPTER} \
       ${SAVE_PATH_TO_MERGED_LLM} \
       --max-shard-size 2GB
   ```

From 515b933b79f93d34617016ae63ca0f3f19276ab2 Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 10:11:27 +0800
Subject: [PATCH 08/25] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index ba236ac34..d85124516 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,7 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor](
 
   <table>
   <tr>
-    <th colspan="3" align="center">Examples of Plugins-based Chat 🔥🔥🔥</th>
+    <th colspan="3" align="center">Examples of Plugin-based Chat 🔥🔥🔥</th>
   </tr>
   <tr>
   <td>
@@ -220,7 +220,7 @@ For more examples, please see [chat.md](./docs/en/user_guides/chat.md).
       --seed 0
   ```
 
-  🎯 We are woking closely with [LMDeploy](https://github.com/InternLM/lmdeploy), to implement the deployment of **plugins-based chat**!
+  🎯 We are woking closely with [LMDeploy](https://github.com/InternLM/lmdeploy), to implement the deployment of **plugin-based chat**!
 
 ### Evaluation
 

From cf8088bba6f32ea9c0a10c7fdc2cc39cd838ffa0 Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 10:12:01 +0800
Subject: [PATCH 09/25] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d85124516..e24901117 100644
--- a/README.md
+++ b/README.md
@@ -175,7 +175,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
 
   For more examples, please see [finetune.md](./docs/en/user_guides/finetune.md).
 
-- **Step 2**, convert the saved pth model to HuggingFace model, by
+- **Step 2**, convert the saved PTH model to HuggingFace model, by
 
   ```shell
   xtuner convert pth2hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL}

From a7c0481d75bcf549dbfb177abb47dfce50d77b85 Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 10:15:12 +0800
Subject: [PATCH 10/25] Update README_zh-CN.md

---
 README_zh-CN.md | 66 +++++++++++++++++++++++++++----------------------
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/README_zh-CN.md b/README_zh-CN.md
index 5a4373852..8bc6bcc8b 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -26,9 +26,27 @@ XTuner 是一个轻量级微调大语言模型的工具库，由 [MMRazor](https
 
 ## 🌟 示例
 
+- XTuner APIs所提供的开箱即用的模型与数据集 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing)
 - QLoRA 微调 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing)
 - 基于插件的对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
-- XTuner APIs所提供的开箱即用的模型与数据集 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing)
+    
+  <table>
+  <tr>
+    <th colspan="3" align="center">基于插件的对话 🔥🔥🔥</th>
+  </tr>
+  <tr>
+  <td>
+  <a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/7c429d98-7630-4539-8aff-c89094826f8c"></a>
+  </td>
+  <td>
+  <a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/05d02906-5a82-45bc-b4e3-2cc32d473b2c"></a>
+  </td>
+  <td>
+  <a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/80395303-997a-47f2-b7d2-d585034df683"></a>
+  </td>
+  </tr>
+  </table>
+
 
 ## 🔥 支持列表
 
@@ -139,7 +157,13 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
   xtuner copy-cfg ${CONFIG_NAME} ${SAVE_DIR}
   ```
 
-- **步骤 1**，开始微调。例如，我们可以利用 QLoRA 算法在 oasst1 数据集上微调 InternLM-7B：
+- **步骤 1**，开始微调。
+
+  ```shell
+  xtuner train ${CONFIG_NAME_OR_PATH}
+  ```
+  
+  例如，我们可以利用 QLoRA 算法在 oasst1 数据集上微调 InternLM-7B：
 
   ```shell
   # 单卡
@@ -150,41 +174,25 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
 
   更多示例，请查阅[文档](./docs/zh_cn/user_guides/finetune.md)。
 
-- **步骤 2**，将 pth adapter 转换为 HuggingFace adapter：
+- **步骤 2**，将保存的 PTH 模型转换为 HuggingFace 模型：
 
   ```shell
-  xtuner convert adapter_pth2hf \
-      ${CONFIG} \
-      ${PATH_TO_PTH_ADAPTER} \
-      ${SAVE_PATH_TO_HF_ADAPTER}
+  xtuner convert pth2hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL}
   ```
 
 ### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
 
-<table>
-<tr>
-  <th colspan="3" align="center">基于插件的对话 🔥🔥🔥</th>
-</tr>
-<tr>
-<td>
-<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/7c429d98-7630-4539-8aff-c89094826f8c"></a>
-</td>
-<td>
-<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/05d02906-5a82-45bc-b4e3-2cc32d473b2c"></a>
-</td>
-<td>
-<a><img src="https://github.com/InternLM/lmdeploy/assets/36994684/80395303-997a-47f2-b7d2-d585034df683"></a>
-</td>
-</tr>
-</table>
-
 XTuner 提供与大语言模型对话的工具。
 
-- 例如，与基于插件微调获得的 Llama2-7B-Plugins 对话：
+```shell
+xtuner chat ${NAME_OR_PATH_TO_LLM} --adapter {NAME_OR_PATH_TO_ADAPTER} [optional arguments]
+```
 
-  ```shell
-  xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
-  ```
+例如，与 Llama2-7b + MOSS-003-SFT adapter 对话：
+
+```shell
+xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "<eoc>" --answer-stop-word "<eom>" --no-streamer
+```
 
 更多示例，请查阅[文档](./docs/zh_cn/user_guides/chat.md)。
 
@@ -195,7 +203,7 @@ XTuner 提供与大语言模型对话的工具。
   ```shell
   xtuner convert merge_adapter \
       ${NAME_OR_PATH_TO_LLM} \
-      ${PATH_TO_PTH_ADAPTER} \
+      ${NAME_OR_PATH_TO_ADAPTER} \
       ${SAVE_PATH_TO_MERGED_LLM} \
       --max-shard-size 2GB
   ```

From 85da402697a55c16ec678541a61cc01d897cf4ef Mon Sep 17 00:00:00 2001
From: LZHgrla <linzhihao@pjlab.org.cn>
Date: Tue, 5 Sep 2023 10:15:35 +0800
Subject: [PATCH 11/25] fix pre-commit

---
 README.md       | 3 ++-
 README_zh-CN.md | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index e24901117..60b938e1f 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,9 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor](
 ## 🌟 Demos
 
 - Ready-to-use models and datasets from XTuner API [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing)
+
 - QLoRA Fine-tune [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing)
+
 - Plugin-based Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
 
   <table>
@@ -47,7 +49,6 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor](
   </tr>
   </table>
 
-
 ## 🔥 Supports
 
 <table>
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 8bc6bcc8b..8bf52a263 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -27,9 +27,11 @@ XTuner 是一个轻量级微调大语言模型的工具库，由 [MMRazor](https
 ## 🌟 示例
 
 - XTuner APIs所提供的开箱即用的模型与数据集 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing)
+
 - QLoRA 微调 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing)
+
 - 基于插件的对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
-    
+
   <table>
   <tr>
     <th colspan="3" align="center">基于插件的对话 🔥🔥🔥</th>
@@ -47,7 +49,6 @@ XTuner 是一个轻量级微调大语言模型的工具库，由 [MMRazor](https
   </tr>
   </table>
 
-
 ## 🔥 支持列表
 
 <table>
@@ -162,7 +163,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
   ```shell
   xtuner train ${CONFIG_NAME_OR_PATH}
   ```
-  
+
   例如，我们可以利用 QLoRA 算法在 oasst1 数据集上微调 InternLM-7B：
 
   ```shell

From df02d715e23656c55ad9ba90cc55c6b9407ee38c Mon Sep 17 00:00:00 2001
From: LZHgrla <linzhihao@pjlab.org.cn>
Date: Tue, 5 Sep 2023 10:19:52 +0800
Subject: [PATCH 12/25] rename converter

---
 xtuner/entry_point.py                         | 27 +++++++++----------
 .../{merge_adapter.py => merge.py}            |  0
 .../{adapter_pth2hf.py => pth2hf.py}          |  0
 .../{split_hf_llm.py => split.py}             |  0
 4 files changed, 13 insertions(+), 14 deletions(-)
 rename xtuner/tools/model_converters/{merge_adapter.py => merge.py} (100%)
 rename xtuner/tools/model_converters/{adapter_pth2hf.py => pth2hf.py} (100%)
 rename xtuner/tools/model_converters/{split_hf_llm.py => split.py} (100%)

diff --git a/xtuner/entry_point.py b/xtuner/entry_point.py
index c57ff2810..5a775102d 100644
--- a/xtuner/entry_point.py
+++ b/xtuner/entry_point.py
@@ -10,8 +10,7 @@
 import xtuner
 from xtuner.tools import chat, copy_cfg, list_cfg, test, train
 from xtuner.tools.data_preprocess import arxiv as arxiv_preprocess
-from xtuner.tools.model_converters import (adapter_pth2hf, merge_adapter,
-                                           split_hf_llm)
+from xtuner.tools.model_converters import merge, pth2hf, split
 
 # Define valid modes
 MODES = ('list-cfg', 'copy-cfg', 'train', 'test', 'chat', 'convert',
@@ -38,13 +37,13 @@
         3-2. Fine-tune LLMs by multiple GPUs:
             NPROC_PER_NODE=$NGPUS NNODES=$NNODES NODE_RANK=$NODE_RANK PORT=$PORT ADDR=$ADDR xtuner dist_train $CONFIG $GPUS
         4. Chat with LLMs with HuggingFace's model and adapter:
-            xtuner chat $NAME_OR_PATH_TO_HF_MODEL --adapter $NAME_OR_PATH_TO_HF_ADAPTER --prompt-template $PROMPT_TEMPLATE
-        5-1. Convert the pth adapter to HuggingFace's adapter:
-            xtuner convert adapter_pth2hf $CONFIG $PATH_TO_PTH_ADAPTER $SAVE_PATH_TO_HF_ADAPTER
+            xtuner chat $NAME_OR_PATH_TO_LLM --adapter $NAME_OR_PATH_TO_ADAPTER --prompt-template $PROMPT_TEMPLATE
+        5-1. Convert the pth model to HuggingFace's model:
+            xtuner convert pth2hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL
         5-2. Merge the HuggingFace's adapter to the pretrained LLM:
-            xtuner convert merge_adapter $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH
+            xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH
         5-3. Split HuggingFace's LLM to the smallest sharded one:
-            xtuner convert split_hf_llm $NAME_OR_PATH_TO_HF_MODEL $SAVE_PATH
+            xtuner convert split $NAME_OR_PATH_TO_LLM $SAVE_PATH
         6-1. Preprocess arxiv dataset:
             xtuner preprocess arxiv $SRC_FILE $DST_FILE --start-date $START_DATE --categories $CATEGORIES
 
@@ -69,12 +68,12 @@
 
     Some usages for convert: (See more by using -h for specific command!)
 
-        1. Convert the pth adapter to HuggingFace's adapter:
-            xtuner convert adapter_pth2hf $CONFIG $PATH_TO_PTH_ADAPTER $SAVE_PATH_TO_HF_ADAPTER
+        1. Convert the pth model to HuggingFace's model:
+            xtuner convert pth2hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL
         2. Merge the HuggingFace's adapter to the pretrained LLM:
-            xtuner convert merge_adapter $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH
+            xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH
         3. Split HuggingFace's LLM to the smallest sharded one:
-            xtuner convert split_hf_llm $NAME_OR_PATH_TO_HF_MODEL $SAVE_PATH
+            xtuner convert split $NAME_OR_PATH_TO_LLM $SAVE_PATH
 
     GitHub: https://github.com/InternLM/xtuner
     """  # noqa: E501
@@ -117,9 +116,9 @@
     'test': test.__file__,
     'chat': chat.__file__,
     'convert': {
-        'adapter_pth2hf': adapter_pth2hf.__file__,
-        'merge_adapter': merge_adapter.__file__,
-        'split_hf_llm': split_hf_llm.__file__,
+        'pth2hf': pth2hf.__file__,
+        'merge': merge.__file__,
+        'split': split.__file__,
         '--help': lambda: print_log(CONVERT_HELP_MSG, 'current'),
         '-h': lambda: print_log(CONVERT_HELP_MSG, 'current')
     },
diff --git a/xtuner/tools/model_converters/merge_adapter.py b/xtuner/tools/model_converters/merge.py
similarity index 100%
rename from xtuner/tools/model_converters/merge_adapter.py
rename to xtuner/tools/model_converters/merge.py
diff --git a/xtuner/tools/model_converters/adapter_pth2hf.py b/xtuner/tools/model_converters/pth2hf.py
similarity index 100%
rename from xtuner/tools/model_converters/adapter_pth2hf.py
rename to xtuner/tools/model_converters/pth2hf.py
diff --git a/xtuner/tools/model_converters/split_hf_llm.py b/xtuner/tools/model_converters/split.py
similarity index 100%
rename from xtuner/tools/model_converters/split_hf_llm.py
rename to xtuner/tools/model_converters/split.py

From b97e9fe7bf7f8a6335b9f4605ddd65d3f0c4670a Mon Sep 17 00:00:00 2001
From: LZHgrla <linzhihao@pjlab.org.cn>
Date: Tue, 5 Sep 2023 10:54:45 +0800
Subject: [PATCH 13/25] update pth2hf

---
 xtuner/tools/chat.py                    |  7 ----
 xtuner/tools/model_converters/pth2hf.py | 48 +++++++++++++++++--------
 2 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/xtuner/tools/chat.py b/xtuner/tools/chat.py
index 2af9143a4..f3cf452c6 100644
--- a/xtuner/tools/chat.py
+++ b/xtuner/tools/chat.py
@@ -26,7 +26,6 @@ def parse_args():
     parser = argparse.ArgumentParser(description='Chat with a HF model')
     parser.add_argument(
         'model_name_or_path', help='Hugging Face model name or path')
-    parser.add_argument('--pretrained', default=None, help='pretrained path')
     parser.add_argument('--adapter', default=None, help='adapter name or path')
     parser.add_argument(
         '--prompt-template',
@@ -134,18 +133,12 @@ def main():
             bnb_4bit_quant_type='nf4')
     elif args.bits == 8:
         load_in_8bit = True
-    assert args.pretrained is None or args.bits is None
     model = AutoModelForCausalLM.from_pretrained(
         args.model_name_or_path,
         quantization_config=quantization_config,
         load_in_8bit=load_in_8bit,
         device_map='auto',
         trust_remote_code=True)
-    if args.pretrained is not None:
-        pretrained_ckpt = torch.load(args.pretrained, map_location='cpu')
-        pretrained_ckpt = remove_prefix(pretrained_ckpt, 'llm.')
-        model.load_state_dict(pretrained_ckpt)
-        print(f'Load pretrained weight from {args.pretrained}')
     tokenizer = AutoTokenizer.from_pretrained(
         args.model_name_or_path, trust_remote_code=True)
     if args.adapter is not None:
diff --git a/xtuner/tools/model_converters/pth2hf.py b/xtuner/tools/model_converters/pth2hf.py
index ce69c90a9..32aecad2d 100644
--- a/xtuner/tools/model_converters/pth2hf.py
+++ b/xtuner/tools/model_converters/pth2hf.py
@@ -5,7 +5,6 @@
 
 import torch
 from mmengine.config import Config, DictAction
-from mmengine.utils import mkdir_or_exist
 
 from xtuner.configs import cfgs_name_path
 from xtuner.registry import BUILDER
@@ -13,18 +12,14 @@
 
 def parse_args():
     parser = argparse.ArgumentParser(
-        description='Convert the pth adapter to HuggingFace adapter')
+        description='Convert the pth model to HuggingFace model')
     parser.add_argument(
         'config',
         help='config file name or path. Note: Please use the original '
         'configs, instead of the automatically saved log configs.')
-    parser.add_argument('adapter_checkpoint', help='adapter checkpoint file')
+    parser.add_argument('pth_model', help='pth model file')
     parser.add_argument(
-        'save_dir', help='the directory to save the checkpoint')
-    parser.add_argument(
-        '--is-deepspeed',
-        action='store_true',
-        help='whether the adapter is saved from deepspeed')
+        'save_dir', help='the directory to save HuggingFace model')
     parser.add_argument(
         '--cfg-options',
         nargs='+',
@@ -39,6 +34,29 @@ def parse_args():
     return args
 
 
+def guess_load_checkpoint(pth_model):
+    if os.path.isfile(pth_model):
+        state_dict = torch.load(pth_model, map_location='cpu')
+        if 'state_dict' in state_dict:
+            state_dict = state_dict['state_dict']
+    elif os.path.isdir(pth_model):
+        try:
+            from deepspeed.utils.zero_to_fp32 import \
+                get_fp32_state_dict_from_zero_checkpoint
+        except ImportError:
+            raise ImportError(
+                'The provided PTH model appears to be a DeepSpeed checkpoint. '
+                'However, DeepSpeed library is not detected in current '
+                'environment. This suggests that DeepSpeed may not be '
+                'installed or is incorrectly configured. Please verify your '
+                'setup.')
+        state_dict = get_fp32_state_dict_from_zero_checkpoint(
+            os.path.dirname(pth_model), os.path.basename(pth_model))
+    else:
+        raise FileNotFoundError(f'Cannot find {pth_model}')
+    return state_dict
+
+
 def main():
     args = parse_args()
 
@@ -56,17 +74,19 @@ def main():
 
     model = BUILDER.build(cfg.model)
 
-    state_dict = torch.load(args.adapter_checkpoint, map_location='cpu')
-    if not args.is_deepspeed:
-        state_dict = state_dict['state_dict']
+    state_dict = guess_load_checkpoint(args.pth_model)
     model.load_state_dict(state_dict, strict=False)
-    print(f'Load adapter from {args.adapter_checkpoint}')
+    print(f'Load PTH model from {args.pth_model}')
 
-    mkdir_or_exist(args.save_dir)
+    print(f'Saving HuggingFace model to {args.save_dir}')
     model.llm.save_pretrained(args.save_dir)
+    if 'PeftModel' not in model.llm.__class__.__name__:
+        print(f'Saving HuggingFace tokenizer to {args.save_dir}')
+        tokenizer = BUILDER.build(cfg.tokenizer)
+        tokenizer.save_pretrained(args.save_dir)
     shutil.copyfile(args.config, os.path.join(args.save_dir,
                                               'xtuner_config.py'))
-    print(f'Save to {args.save_dir}')
+    print('All done!')
 
 
 if __name__ == '__main__':

From a87eff26a3ccae3743fb5ca763235dd75e0a5161 Mon Sep 17 00:00:00 2001
From: LZHgrla <linzhihao@pjlab.org.cn>
Date: Tue, 5 Sep 2023 10:55:46 +0800
Subject: [PATCH 14/25] rename pth2hf to pth_to_hf

---
 README.md                                                 | 2 +-
 README_zh-CN.md                                           | 2 +-
 xtuner/entry_point.py                                     | 8 ++++----
 xtuner/tools/model_converters/{pth2hf.py => pth_to_hf.py} | 0
 4 files changed, 6 insertions(+), 6 deletions(-)
 rename xtuner/tools/model_converters/{pth2hf.py => pth_to_hf.py} (100%)

diff --git a/README.md b/README.md
index 60b938e1f..b04af0ef1 100644
--- a/README.md
+++ b/README.md
@@ -179,7 +179,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
 - **Step 2**, convert the saved PTH model to HuggingFace model, by
 
   ```shell
-  xtuner convert pth2hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL}
+  xtuner convert pth_to_hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL}
   ```
 
 ### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 8bf52a263..1615959d4 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -178,7 +178,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
 - **步骤 2**，将保存的 PTH 模型转换为 HuggingFace 模型：
 
   ```shell
-  xtuner convert pth2hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL}
+  xtuner convert pth_to_hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL}
   ```
 
 ### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
diff --git a/xtuner/entry_point.py b/xtuner/entry_point.py
index 5a775102d..890210e6d 100644
--- a/xtuner/entry_point.py
+++ b/xtuner/entry_point.py
@@ -10,7 +10,7 @@
 import xtuner
 from xtuner.tools import chat, copy_cfg, list_cfg, test, train
 from xtuner.tools.data_preprocess import arxiv as arxiv_preprocess
-from xtuner.tools.model_converters import merge, pth2hf, split
+from xtuner.tools.model_converters import merge, pth_to_hf, split
 
 # Define valid modes
 MODES = ('list-cfg', 'copy-cfg', 'train', 'test', 'chat', 'convert',
@@ -39,7 +39,7 @@
         4. Chat with LLMs with HuggingFace's model and adapter:
             xtuner chat $NAME_OR_PATH_TO_LLM --adapter $NAME_OR_PATH_TO_ADAPTER --prompt-template $PROMPT_TEMPLATE
         5-1. Convert the pth model to HuggingFace's model:
-            xtuner convert pth2hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL
+            xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL
         5-2. Merge the HuggingFace's adapter to the pretrained LLM:
             xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH
         5-3. Split HuggingFace's LLM to the smallest sharded one:
@@ -69,7 +69,7 @@
     Some usages for convert: (See more by using -h for specific command!)
 
         1. Convert the pth model to HuggingFace's model:
-            xtuner convert pth2hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL
+            xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL
         2. Merge the HuggingFace's adapter to the pretrained LLM:
             xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH
         3. Split HuggingFace's LLM to the smallest sharded one:
@@ -116,7 +116,7 @@
     'test': test.__file__,
     'chat': chat.__file__,
     'convert': {
-        'pth2hf': pth2hf.__file__,
+        'pth_to_hf': pth_to_hf.__file__,
         'merge': merge.__file__,
         'split': split.__file__,
         '--help': lambda: print_log(CONVERT_HELP_MSG, 'current'),
diff --git a/xtuner/tools/model_converters/pth2hf.py b/xtuner/tools/model_converters/pth_to_hf.py
similarity index 100%
rename from xtuner/tools/model_converters/pth2hf.py
rename to xtuner/tools/model_converters/pth_to_hf.py

From f5db1fe45f17a409f359d855105e803bca02ee29 Mon Sep 17 00:00:00 2001
From: LZHgrla <linzhihao@pjlab.org.cn>
Date: Tue, 5 Sep 2023 11:39:14 +0800
Subject: [PATCH 15/25] add fp32 for pth_to_hf

---
 xtuner/entry_point.py                      | 10 +++++-----
 xtuner/tools/model_converters/merge.py     |  7 ++++++-
 xtuner/tools/model_converters/pth_to_hf.py | 17 ++++++++++++++++-
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/xtuner/entry_point.py b/xtuner/entry_point.py
index 890210e6d..f36f2cc39 100644
--- a/xtuner/entry_point.py
+++ b/xtuner/entry_point.py
@@ -36,14 +36,14 @@
             xtuner train $CONFIG
         3-2. Fine-tune LLMs by multiple GPUs:
             NPROC_PER_NODE=$NGPUS NNODES=$NNODES NODE_RANK=$NODE_RANK PORT=$PORT ADDR=$ADDR xtuner dist_train $CONFIG $GPUS
-        4. Chat with LLMs with HuggingFace's model and adapter:
-            xtuner chat $NAME_OR_PATH_TO_LLM --adapter $NAME_OR_PATH_TO_ADAPTER --prompt-template $PROMPT_TEMPLATE
-        5-1. Convert the pth model to HuggingFace's model:
+        4-1. Convert the pth model to HuggingFace's model:
             xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL
-        5-2. Merge the HuggingFace's adapter to the pretrained LLM:
+        4-2. Merge the HuggingFace's adapter to the pretrained LLM:
             xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH
-        5-3. Split HuggingFace's LLM to the smallest sharded one:
+        4-3. Split HuggingFace's LLM to the smallest sharded one:
             xtuner convert split $NAME_OR_PATH_TO_LLM $SAVE_PATH
+        5. Chat with LLMs with HuggingFace's model and adapter:
+            xtuner chat $NAME_OR_PATH_TO_LLM --adapter $NAME_OR_PATH_TO_ADAPTER --prompt-template $PROMPT_TEMPLATE
         6-1. Preprocess arxiv dataset:
             xtuner preprocess arxiv $SRC_FILE $DST_FILE --start-date $START_DATE --categories $CATEGORIES
 
diff --git a/xtuner/tools/model_converters/merge.py b/xtuner/tools/model_converters/merge.py
index 2de6bc23a..169cea620 100644
--- a/xtuner/tools/model_converters/merge.py
+++ b/xtuner/tools/model_converters/merge.py
@@ -13,7 +13,12 @@ def parse_args():
     parser.add_argument('adapter_name_or_path', help='adapter name or path')
     parser.add_argument(
         'save_dir', help='the directory to save the merged model')
-    parser.add_argument('--max-shard-size', type=str, default='2GB')
+    parser.add_argument(
+        '--max-shard-size',
+        type=str,
+        default='2GB',
+        help='Only applicable for LLM. The maximum size for '
+        'each sharded checkpoint.')
     args = parser.parse_args()
     return args
 
diff --git a/xtuner/tools/model_converters/pth_to_hf.py b/xtuner/tools/model_converters/pth_to_hf.py
index 32aecad2d..ccf15861b 100644
--- a/xtuner/tools/model_converters/pth_to_hf.py
+++ b/xtuner/tools/model_converters/pth_to_hf.py
@@ -20,6 +20,16 @@ def parse_args():
     parser.add_argument('pth_model', help='pth model file')
     parser.add_argument(
         'save_dir', help='the directory to save HuggingFace model')
+    parser.add_argument(
+        '--fp32',
+        action='store_true',
+        help='Save as fp32. If not set, fp16 will be used by default.')
+    parser.add_argument(
+        '--max-shard-size',
+        type=str,
+        default='2GB',
+        help='Only applicable for LLM. The maximum size for '
+        'each sharded checkpoint.')
     parser.add_argument(
         '--cfg-options',
         nargs='+',
@@ -78,8 +88,13 @@ def main():
     model.load_state_dict(state_dict, strict=False)
     print(f'Load PTH model from {args.pth_model}')
 
+    if not args.fp32:
+        print('Convert weights to float16')
+        model.llm.half()
+
     print(f'Saving HuggingFace model to {args.save_dir}')
-    model.llm.save_pretrained(args.save_dir)
+    model.llm.save_pretrained(
+        args.save_dir, max_shard_size=args.max_shard_size)
     if 'PeftModel' not in model.llm.__class__.__name__:
         print(f'Saving HuggingFace tokenizer to {args.save_dir}')
         tokenizer = BUILDER.build(cfg.tokenizer)

From 8e3a71eb6f007a065cef3335bfbe3853d504f201 Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 11:49:01 +0800
Subject: [PATCH 16/25] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b04af0ef1..9f4e9e326 100644
--- a/README.md
+++ b/README.md
@@ -176,7 +176,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
 
   For more examples, please see [finetune.md](./docs/en/user_guides/finetune.md).
 
-- **Step 2**, convert the saved PTH model to HuggingFace model, by
+- **Step 2**, convert the saved PTH model (if using DeepSpeed, it will be a directory) to HuggingFace model, by
 
   ```shell
   xtuner convert pth_to_hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL}

From 2cac8f43f09185269173dddcea9815e4fcf878f1 Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 11:50:58 +0800
Subject: [PATCH 17/25] Update README_zh-CN.md

---
 README_zh-CN.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README_zh-CN.md b/README_zh-CN.md
index 1615959d4..d0455541e 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -175,7 +175,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
 
   更多示例，请查阅[文档](./docs/zh_cn/user_guides/finetune.md)。
 
-- **步骤 2**，将保存的 PTH 模型转换为 HuggingFace 模型：
+- **步骤 2**，将保存的 PTH 模型（如果使用的DeepSpeed，则将会是一个文件夹）转换为 HuggingFace 模型：
 
   ```shell
   xtuner convert pth_to_hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL}

From 737e3ad9401b7bc674140afa1d1124775957aa7f Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 12:06:02 +0800
Subject: [PATCH 18/25] Update README_zh-CN.md

---
 README_zh-CN.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README_zh-CN.md b/README_zh-CN.md
index d0455541e..4f1969356 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -178,7 +178,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
 - **步骤 2**，将保存的 PTH 模型（如果使用的DeepSpeed，则将会是一个文件夹）转换为 HuggingFace 模型：
 
   ```shell
-  xtuner convert pth_to_hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL}
+  xtuner convert pth_to_hf ${CONFIG} ${PTH} ${SAVE_PATH}
   ```
 
 ### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
@@ -205,7 +205,7 @@ xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-
   xtuner convert merge_adapter \
       ${NAME_OR_PATH_TO_LLM} \
       ${NAME_OR_PATH_TO_ADAPTER} \
-      ${SAVE_PATH_TO_MERGED_LLM} \
+      ${SAVE_PATH} \
       --max-shard-size 2GB
   ```
 

From 81373c42bb6703d6f187bd6e6ce26f01ddc03e86 Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 12:07:43 +0800
Subject: [PATCH 19/25] Update README.md

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 9f4e9e326..dcc0c29d7 100644
--- a/README.md
+++ b/README.md
@@ -155,7 +155,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
   Or, if the provided configs cannot meet the requirements, please copy the provided config to the specified directory and make specific modifications by
 
   ```shell
-  xtuner copy-cfg ${CONFIG_NAME} ${SAVE_DIR}
+  xtuner copy-cfg ${CONFIG_NAME} ${SAVE_PATH}
   ```
 
 - **Step 1**, start fine-tuning.
@@ -179,7 +179,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
 - **Step 2**, convert the saved PTH model (if using DeepSpeed, it will be a directory) to HuggingFace model, by
 
   ```shell
-  xtuner convert pth_to_hf ${CONFIG} ${PATH_TO_PTH_MODEL} ${SAVE_PATH_TO_HF_MODEL}
+  xtuner convert pth_to_hf ${CONFIG} ${PTH} ${SAVE_PATH}
   ```
 
 ### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
@@ -206,7 +206,7 @@ For more examples, please see [chat.md](./docs/en/user_guides/chat.md).
   xtuner convert merge_adapter \
       ${NAME_OR_PATH_TO_LLM} \
       ${NAME_OR_PATH_TO_ADAPTER} \
-      ${SAVE_PATH_TO_MERGED_LLM} \
+      ${SAVE_PATH} \
       --max-shard-size 2GB
   ```
 

From 45627b9baa0a01f86bbbbb6a1daef31a18fe5dfe Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 12:07:45 +0800
Subject: [PATCH 20/25] Update README_zh-CN.md

---
 README_zh-CN.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README_zh-CN.md b/README_zh-CN.md
index 4f1969356..8fe70cf5d 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -155,7 +155,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
   或者，如果所提供的配置文件不能满足使用需求，请导出所提供的配置文件并进行相应更改：
 
   ```shell
-  xtuner copy-cfg ${CONFIG_NAME} ${SAVE_DIR}
+  xtuner copy-cfg ${CONFIG_NAME} ${SAVE_PATH}
   ```
 
 - **步骤 1**，开始微调。

From 24f2dfeece461159c2cbe78f4d6a3d2c7bd2a56e Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 12:08:15 +0800
Subject: [PATCH 21/25] Update README_zh-CN.md

---
 README_zh-CN.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README_zh-CN.md b/README_zh-CN.md
index 8fe70cf5d..69229f99c 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -178,7 +178,7 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
 - **步骤 2**，将保存的 PTH 模型（如果使用的DeepSpeed，则将会是一个文件夹）转换为 HuggingFace 模型：
 
   ```shell
-  xtuner convert pth_to_hf ${CONFIG} ${PTH} ${SAVE_PATH}
+  xtuner convert pth_to_hf ${CONFIG_NAME_OR_PATH} ${PTH} ${SAVE_PATH}
   ```
 
 ### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)

From b88024830775f8659b1ee4bdbab4be60c97f42f7 Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 12:08:25 +0800
Subject: [PATCH 22/25] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index dcc0c29d7..5a48eb6e9 100644
--- a/README.md
+++ b/README.md
@@ -179,7 +179,7 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
 - **Step 2**, convert the saved PTH model (if using DeepSpeed, it will be a directory) to HuggingFace model, by
 
   ```shell
-  xtuner convert pth_to_hf ${CONFIG} ${PTH} ${SAVE_PATH}
+  xtuner convert pth_to_hf ${CONFIG_NAME_OR_PATH} ${PTH} ${SAVE_PATH}
   ```
 
 ### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)

From d4ece548aa3ca1b66b7d6d53052bd0d3c914befe Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 15:20:43 +0800
Subject: [PATCH 23/25] Update README.md

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 5a48eb6e9..e8702aa5d 100644
--- a/README.md
+++ b/README.md
@@ -221,6 +221,8 @@ For more examples, please see [chat.md](./docs/en/user_guides/chat.md).
       --seed 0
   ```
 
+  🔥 Seeking efficient inference with less GPU memory? Try 4-bit quantization from [LMDeploy](https://github.com/InternLM/lmdeploy)! For more details, see [here](https://github.com/InternLM/lmdeploy/tree/main#quantization).
+
   🎯 We are woking closely with [LMDeploy](https://github.com/InternLM/lmdeploy), to implement the deployment of **plugin-based chat**!
 
 ### Evaluation

From 4f8b2ddfa0bbf5ea622a9a7efaf28fcb1b337f44 Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 15:22:10 +0800
Subject: [PATCH 24/25] Update README_zh-CN.md

---
 README_zh-CN.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README_zh-CN.md b/README_zh-CN.md
index 69229f99c..4cf95a943 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -219,6 +219,7 @@ xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-
       --top_p 0.95 \
       --seed 0
   ```
+  🔥 追求速度更快、显存占用更低的推理？欢迎体验 [LMDeploy](https://github.com/InternLM/lmdeploy) 提供的 4-bit 量化！使用指南请见[文档](https://github.com/InternLM/lmdeploy/tree/main#quantization)。
 
   🎯 我们正在与 [LMDeploy](https://github.com/InternLM/lmdeploy) 紧密合作，以实现基于插件对话的部署！
 

From 27d42e58a2f55625bf7bc157fee2da092c2eb9ea Mon Sep 17 00:00:00 2001
From: LZHgrla <linzhihao@pjlab.org.cn>
Date: Tue, 5 Sep 2023 15:22:35 +0800
Subject: [PATCH 25/25] fix pre-commit

---
 README_zh-CN.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README_zh-CN.md b/README_zh-CN.md
index 4cf95a943..5786e1364 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -219,6 +219,7 @@ xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-
       --top_p 0.95 \
       --seed 0
   ```
+
   🔥 追求速度更快、显存占用更低的推理？欢迎体验 [LMDeploy](https://github.com/InternLM/lmdeploy) 提供的 4-bit 量化！使用指南请见[文档](https://github.com/InternLM/lmdeploy/tree/main#quantization)。
 
   🎯 我们正在与 [LMDeploy](https://github.com/InternLM/lmdeploy) 紧密合作，以实现基于插件对话的部署！